Beispiel #1
0
def return_to_wet_date(wet_season_filter_data, broad_filter_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number):
    search_index = None
    max_wet_peak_mag = max(broad_filter_data[20:])
    max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag)
    min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index])
    maxarray_wet, _ = peakdet(
        wet_season_filter_data, peak_sensitivity_wet)

    """Loop through peaks to find starting point of search"""
    for index, value in enumerate(maxarray_wet):
        if len(maxarray_wet) == 1:
            if maxarray_wet[0][0] == 0:
                search_index = max_wet_peak_index
                break
            else:
                search_index = int(maxarray_wet[0][0])
                break
        else:
            if (maxarray_wet[index][1]-min_wet_peak_mag)/(max_wet_peak_mag-min_wet_peak_mag) > peak_detect_perc:
                search_index = int(maxarray_wet[index][0])
                break
    """Loop backwards from max flow index to beginning, to search for wet season"""
    if not search_index:
        return None
    for index, value in enumerate(reversed(wet_season_filter_data[:search_index])):
        if index == len(wet_season_filter_data[:search_index] - 1):
            return None
        elif (value - min_wet_peak_mag) / (max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc:
            """If value percentage falls below wet_threshold_perc"""
            return_date = search_index - index
            return return_date
 def _get_result_arrays(self, fixed_df, current_gauge_column_index):
     current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(
         fixed_df, current_gauge_column_index, self.start_date)
     self.general_info(current_gauge_class, current_gauge_number)
     if current_gauge_number in gauge_reference:
         start_year_index = find_index(
             year_ranges,
             int(gauge_reference[int(current_gauge_number)]['start']))
         end_year_index = find_index(
             year_ranges,
             int(gauge_reference[int(current_gauge_number)]['end']) + 1)
     else:
         print('Gauge {} Not Found'.format(current_gauge_number))
     current_gauge = Gauge(current_gauge_class, current_gauge_number,
                           year_ranges, flow_matrix, julian_dates,
                           self.start_date, start_year_index,
                           end_year_index)
     self.get_result_arrays(current_gauge)
Beispiel #3
0
def return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data,
                       slope_detection_data, wet_threshold_perc,
                       peak_detect_perc, peak_sensitivity_wet, column_number,
                       slope_sensitivity):
    search_index = None
    max_wet_peak_mag = max(broad_filter_data[20:])
    max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag)

    if broad_filter_data[:max_wet_peak_index].size == 0:
        return None
    min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index])
    maxarray_wet, _ = peakdet(wet_season_filter_data, peak_sensitivity_wet)
    """Get the derivative of smoothed data for rate of change requirement"""
    x_axis = list(range(len(slope_detection_data)))
    spl = ip.UnivariateSpline(x_axis, slope_detection_data, k=3, s=3)
    spl_first = spl.derivative(n=1)
    """Loop through peaks to find starting point of search"""
    for index, value in enumerate(maxarray_wet):
        if len(maxarray_wet) == 1:
            if maxarray_wet[0][0] == 0:
                search_index = max_wet_peak_index
                break
            else:
                search_index = int(maxarray_wet[0][0])
                break
        else:
            if (maxarray_wet[index][1] - min_wet_peak_mag) / (
                    max_wet_peak_mag - min_wet_peak_mag) > peak_detect_perc:
                search_index = int(maxarray_wet[index][0])
                break
    """Loop backwards from max flow index to beginning, to search for wet season"""
    if not search_index:
        return None
    for index, value in enumerate(
            reversed(wet_season_filter_data[:search_index])):
        if index == len(wet_season_filter_data[:search_index] - 1):
            return None
        elif (value - min_wet_peak_mag) / (
                max_wet_peak_mag -
                min_wet_peak_mag) < wet_threshold_perc and abs(
                    spl_first(search_index -
                              index)) < max_wet_peak_mag / slope_sensitivity:
            """If value percentage falls below wet_threshold_perc"""
            return_date = search_index - index

            # plt.figure()
            # plt.plot(flow_data, '-', slope_detection_data, '--')
            # if return_date is not None:
            #     plt.axvline(return_date, color='blue')
            # plt.axhline((max_wet_peak_mag-min_wet_peak_mag)*.2, color='orange')
            # plt.text(364,max(flow_data),str(abs(spl_first(search_index - index))))
            # plt.savefig('post_processedFiles/Boxplots/{}.png'.format(column_number))

            return return_date
def return_to_wet_date(wet_filter_data, wet_threshold_perc):
    max_wet_peak_mag = max(wet_filter_data[20:])
    max_wet_peak_index = find_index(wet_filter_data, max_wet_peak_mag)
    min_wet_peak_mag = min(wet_filter_data[:max_wet_peak_index])
    """Loop backwards from max flow index to beginning, to search for wet season"""
    for index, value in enumerate(
            reversed(wet_filter_data[:max_wet_peak_index])):
        if index == len(wet_filter_data[:max_wet_peak_index] - 1):
            return None
        elif (value - min_wet_peak_mag) / (
                max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc:
            """If value percentage falls below wet_threshold_perc"""
            return_date = max_wet_peak_index - index
            return return_date
Beispiel #5
0
def return_to_wet_date(wet_season_filter_data, broad_filter_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number):
    search_index = None
    max_wet_peak_mag = max(broad_filter_data[20:])
    max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag)

    if broad_filter_data[:max_wet_peak_index].size == 0:
        return None
    min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index])
    maxarray_wet, _ = peakdet(
        wet_season_filter_data, peak_sensitivity_wet)
    #
    # plt.figure()
    # plt.plot(wet_season_filter_data, '-', broad_filter_data, ':')
    # for data in maxarray_wet:
    #     plt.plot(data[0], data[1], '^')
    # plt.savefig('post_processedFiles/Boxplots/{}.png'.format(column_number))

    """Loop through peaks to find starting point of search"""
    for index, value in enumerate(maxarray_wet):
        if len(maxarray_wet) == 1:
            if maxarray_wet[0][0] == 0:
                search_index = max_wet_peak_index
                break
            else:
                search_index = int(maxarray_wet[0][0])
                break
        else:
            if (maxarray_wet[index][1]-min_wet_peak_mag)/(max_wet_peak_mag-min_wet_peak_mag) > peak_detect_perc:
                search_index = int(maxarray_wet[index][0])
                break
    """Loop backwards from max flow index to beginning, to search for wet season"""
    if not search_index:
        return None
    for index, value in enumerate(reversed(wet_season_filter_data[:search_index])):
        if index == len(wet_season_filter_data[:search_index] - 1):
            return None
        elif (value - min_wet_peak_mag) / (max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc:
            """If value percentage falls below wet_threshold_perc"""
            return_date = search_index - index
            return return_date
Beispiel #6
0
def dim_hydrograph_plotter_layer(start_date, directory_name, end_with, class_number, gauge_numbers, plot):
    for root,dirs,files in os.walk(directory_name):
        for file in files:
            if file.endswith(end_with):
                fixed_df = pd.read_csv('{}/{}'.format(directory_name, file), sep=',', encoding='latin1', dayfirst=False, header=None).dropna(axis=1, how='all')
                step = is_multiple_date_data(fixed_df);

                current_gauge_column_index = 1

                while current_gauge_column_index <= (len(fixed_df.iloc[1,:]) - 1):
                    if gauge_numbers:
                        if int(fixed_df.iloc[1, current_gauge_column_index]) in gauge_numbers:
                            current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(fixed_df, current_gauge_column_index, start_date)
                            start_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['start']))
                            end_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['end']))
                            flow_matrix = flow_matrix[:,start_year_index:end_year_index]

                            _plotter(flow_matrix, julian_dates, current_gauge_number, plot, start_date)

                    elif not class_number and not gauge_numbers:
                        current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(fixed_df, current_gauge_column_index, start_date)
                        start_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['start']))
                        end_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['end']))
                        flow_matrix = flow_matrix[:,start_year_index:end_year_index]

                        _plotter(flow_matrix, julian_dates, current_gauge_number, plot, start_date)

                    elif int(fixed_df.iloc[0, current_gauge_column_index]) == int(class_number):
                        current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(fixed_df, current_gauge_column_index, start_date)
                        start_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['start']))
                        end_year_index = find_index(year_ranges, int(gauge_reference[int(current_gauge_number)]['end']))
                        flow_matrix = flow_matrix[:,start_year_index:end_year_index]

                        _plotter(flow_matrix, julian_dates, current_gauge_number, plot, start_date)

                    current_gauge_column_index = current_gauge_column_index + step
def calc_fall_flush_timings_durations(flow_matrix, summer_timings):
    max_zero_allowed_per_year = fall_params['max_zero_allowed_per_year']
    max_nan_allowed_per_year = fall_params['max_nan_allowed_per_year']
    min_flow_rate = fall_params['min_flow_rate']
    sigma = fall_params['sigma']  # Smaller filter to find fall flush peak
    wet_sigma = fall_params[
        'wet_sigma']  # Larger filter to find wet season peak
    peak_sensitivity = fall_params['peak_sensitivity']  # smaller is more peak
    max_flush_duration = fall_params[
        'max_flush_duration']  # Maximum duration from start to end, for fall flush peak
    wet_threshold_perc = fall_params[
        'wet_threshold_perc']  # Return to wet season flow must be certain percentage of that year's max flow
    flush_threshold_perc = fall_params[
        'flush_threshold_perc']  # Size of flush peak, from rising limb to top of peak, has great enough change
    min_flush_threshold = fall_params['min_flush_threshold']
    date_cutoff = fall_params[
        'date_cutoff']  # Latest accepted date for fall flush, in Julian Date counting from Oct 1st = 0. (i.e. Dec 15th = 75)

    start_dates = []
    wet_dates = []
    durations = []
    mags = []

    for column_number, column_flow in enumerate(flow_matrix[0]):

        start_dates.append(None)
        wet_dates.append(None)
        durations.append(None)
        mags.append(None)
        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                flow_matrix[:, column_number] ==
                0) > max_zero_allowed_per_year or max(
                    flow_matrix[:, column_number]) < min_flow_rate:
            continue
        """Get flow data"""
        flow_data = flow_matrix[:, column_number]
        x_axis = list(range(len(flow_data)))
        """Interpolate between None values"""
        flow_data = replace_nan(flow_data)
        """Return to Wet Season"""
        wet_filter_data = gaussian_filter1d(flow_data, wet_sigma)
        return_date = return_to_wet_date(wet_filter_data, wet_threshold_perc)
        wet_dates[-1] = return_date + 10
        """Filter noise data with small sigma to find fall flush hump"""
        filter_data = gaussian_filter1d(flow_data, sigma)
        """Fit spline"""
        x_axis = list(range(len(filter_data)))
        spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3)
        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity)
        """Find max and min of filtered flow data"""
        max_flow = max(filter_data[20:])
        max_flow_index = find_index(filter_data[20:], max_flow) + 20
        min_flow = min(wet_filter_data[:max_flow_index])
        """If could not find any max and find"""
        if not list(maxarray) or not list(
                minarray) or minarray[0][0] > max_flow_index:
            continue
        """Get flow magnitude threshold from previous summer's baseflow"""
        baseflows = []
        if column_number == 0:
            wet_date = wet_dates[0]
            baseflow = list(flow_matrix[:wet_date, column_number])
            bs_mean = np.mean(baseflow)
            bs_med = np.nanpercentile(baseflow, 50)
        else:
            summer_date = summer_timings[column_number - 1]
            if wet_dates[column_number] > 20:
                wet_date = wet_dates[column_number] - 20
            baseflow = list(
                flow_matrix[summer_date:, column_number - 1]) + list(
                    flow_matrix[:wet_date, column_number])
            bs_mean = np.mean(baseflow)
            bs_med = np.nanpercentile(baseflow, 50)
        """Get fall flush peak"""
        counter = 0
        half_duration = int(
            max_flush_duration /
            2)  # Only test duration for first half of fall flush peak
        if bs_med > 25:
            min_flush_magnitude = bs_med * 1.5  # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer
        else:
            min_flush_magnitude = bs_med * 2  # otherwise magnitude threshold is 100% above median baseflow of previous summer
        if min_flush_magnitude < min_flush_threshold:
            min_flush_magnitude = min_flush_threshold
        for flow_index in maxarray:

            if counter == 0:
                if flow_index[0] < half_duration and flow_index[
                        0] != 0 and flow_index[1] > wet_filter_data[int(
                            flow_index[0]
                        )] and flow_index[1] > min_flush_magnitude:
                    """if index found is before the half duration allowed"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
                elif bool(
                    (flow_index[1] - spl(maxarray[counter][0] - half_duration))
                        / flow_index[1] > flush_threshold_perc
                        or minarray[counter][0] - maxarray[counter][0] <
                        half_duration) and flow_index[1] > wet_filter_data[int(
                            flow_index[0]
                        )] and flow_index[1] > min_flush_magnitude:
                    """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
            elif counter == len(minarray):
                start_dates[-1] = None
                mags[-1] = None
                break
            elif bool(
                    minarray[counter][0] - maxarray[counter][0] < half_duration
                    or maxarray[counter][0] - minarray[counter - 1][0] <
                    half_duration) and bool(
                        flow_index[1] > wet_filter_data[int(flow_index[0])]
                        and flow_index[1] > min_flush_magnitude
                        and flow_index[0] <= date_cutoff):
                """valley and peak are distanced by less than half dur from either side"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            elif (spl(flow_index[0] - half_duration) - min_flow
                  ) / (flow_index[1] - min_flow) < flush_threshold_perc and (
                      spl(flow_index[0] + half_duration) - min_flow) / (
                          flow_index[1] -
                          min_flow) < flush_threshold_perc and flow_index[
                              1] > wet_filter_data[int(
                                  flow_index[0])] and flow_index[
                                      1] > min_flush_magnitude and flow_index[
                                          0] <= date_cutoff:
                """both side of flow value at the peak + half duration index fall below flush_threshold_perc"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            counter = counter + 1
        """Check to see if last start_date falls behind the max_allowed_date"""
        if bool(start_dates[-1] is None
                or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]:
            start_dates[-1] = None
            mags[-1] = None
        """Get duration of each fall flush"""
        current_duration, left, right = calc_fall_flush_durations_2(
            filter_data, start_dates[-1])
        durations[-1] = current_duration
        _plotter(x_axis, flow_data, filter_data, wet_filter_data, start_dates,
                 wet_dates, column_number, left, right, maxarray, minarray,
                 min_flush_magnitude)

    return start_dates, mags, wet_dates, durations
Beispiel #8
0
def calc_fall_flush_timings_durations(flow_matrix, summer_timings, class_number, fall_params=def_fall_params):

    params = set_user_params(fall_params, def_fall_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year, min_flow_rate, sigma, broad_sigma, wet_season_sigma, peak_sensitivity, peak_sensitivity_wet, max_flush_duration, min_flush_percentage, wet_threshold_perc, peak_detect_perc, flush_threshold_perc, min_flush_threshold, date_cutoff, slope_sensitivity = params.values()

    start_dates = []
    wet_dates = []
    durations = []
    mags = []

    for column_number, _ in enumerate(flow_matrix[0]):

        start_dates.append(None)
        wet_dates.append(None)
        durations.append(None)
        mags.append(None)

        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max(flow_matrix[:, column_number]) < min_flow_rate:
            continue

        """Get flow data"""
        flow_data = flow_matrix[:, column_number]
        x_axis = list(range(len(flow_data)))

        """Interpolate between None values"""
        flow_data = replace_nan(flow_data)

        """Return to Wet Season"""
        if class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8:
            wet_season_filter_data = gaussian_filter1d(flow_data, 6)
        else:
            wet_season_filter_data = gaussian_filter1d(flow_data, wet_season_sigma)
        broad_filter_data = gaussian_filter1d(flow_data, broad_sigma)
        if class_number == 1 or class_number == 2 or class_number == 9:
            slope_detection_data = gaussian_filter1d(flow_data, 7)
        elif class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: 
            slope_detection_data = gaussian_filter1d(flow_data, 1)
        else:
            slope_detection_data = gaussian_filter1d(flow_data, 4)

        return_date = return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data, slope_detection_data, 
                                         wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number, slope_sensitivity)
        if return_date:
            wet_dates[-1] = return_date 
        broad_filter_data = gaussian_filter1d(flow_data, broad_sigma)

        """Filter noise data with small sigma to find fall flush hump"""
        filter_data = gaussian_filter1d(flow_data, sigma)

        """Fit spline"""
        x_axis = list(range(len(filter_data)))
        spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3)

        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity)

        """Find max and min of filtered flow data"""
        max_flow = max(filter_data[20:])
        max_flow_index = find_index(filter_data[20:], max_flow) + 20

        min_flow = min(broad_filter_data[:max_flow_index])

        """If could not find any max and find"""
        if not list(maxarray) or not list(minarray) or minarray[0][0] > max_flow_index:
            continue

        """Get flow magnitude threshold from previous summer's baseflow"""
        if column_number == 0:
            wet_date = wet_dates[0]
            baseflow = list(flow_matrix[:wet_date, column_number])
            # bs_mean = np.mean(baseflow)
            bs_med = np.nanpercentile(baseflow, 50)
        else:
            summer_date = summer_timings[column_number - 1]
            if wet_dates[column_number]:
                if wet_dates[column_number] > 20:
                    wet_date = wet_dates[column_number] - 20
                else:
                    wet_date = wet_dates[column_number]
                baseflow = list(flow_matrix[summer_date:, column_number - 1]) + list(flow_matrix[:wet_date, column_number])
                # bs_mean = np.mean(baseflow)
            else:
                baseflow = list(flow_matrix[summer_date:, column_number - 1])
            bs_med = np.nanpercentile(baseflow, 50)

        """Get fall flush peak"""
        counter = 0
        # Only test duration for first half of fall flush peak
        half_duration = int(max_flush_duration/2)
        if bs_med > 25:
            # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer
            min_flush_magnitude = bs_med * 1.5
        else:
            # otherwise magnitude threshold is 100% above median baseflow of previous summer
            min_flush_magnitude = bs_med * 2
        if min_flush_magnitude < min_flush_threshold:
            min_flush_magnitude = min_flush_threshold
        for flow_index in maxarray:

            if counter == 0:
                if flow_index[0] < half_duration and flow_index[0] != 0 and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                    """if index found is before the half duration allowed"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
                elif bool((flow_index[1] - spl(maxarray[counter][0] - half_duration)) / flow_index[1] > flush_threshold_perc or minarray[counter][0] - maxarray[counter][0] < half_duration) and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                    """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
            elif counter == len(minarray):
                start_dates[-1] = None
                mags[-1] = None
                break
            elif bool(minarray[counter][0] - maxarray[counter][0] < half_duration or maxarray[counter][0] - minarray[counter-1][0] < half_duration) and bool(flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff):
                """valley and peak are distanced by less than half dur from either side"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            elif (spl(flow_index[0] - half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and (spl(flow_index[0] + half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                """both side of flow value at the peak + half duration index fall below flush_threshold_perc"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            counter = counter + 1

        """Check to see if last start_date falls behind the max_allowed_date"""
        if wet_dates[-1]:
            if bool(start_dates[-1] is None or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]:
                start_dates[-1] = None
                mags[-1] = None

        """Get duration of each fall flush"""
        current_duration, left, right = calc_fall_flush_durations_2(
            filter_data, start_dates[-1])
        durations[-1] = current_duration
        # _plotter(x_axis, flow_data, filter_data, broad_filter_data, start_dates, wet_dates, column_number, left, right, maxarray, minarray, min_flush_magnitude, slope_detection_data)
        
    return start_dates, mags, wet_dates, durations
Beispiel #9
0
def calc_start_of_summer(matrix):
    """Set adjustable parameters for start of summer date detection"""
    max_zero_allowed_per_year = summer_params['max_zero_allowed_per_year']
    max_nan_allowed_per_year = summer_params['max_nan_allowed_per_year']
    sigma = summer_params[
        'sigma']  # determines amount of smoothing for summer timing detection
    sensitivity = summer_params[
        'sensitivity']  # increased sensitivity returns smaller threshold for derivative
    peak_sensitivity = summer_params[
        'peak_sensitivity']  # identifies last major peak after which to search for start date
    max_peak_flow_date = summer_params[
        'max_peak_flow_date']  # max search date for the peak flow date
    min_summer_flow_percent = summer_params[
        'min_summer_flow_percent']  # require that summer start is below this flow threshold

    start_dates = []
    for column_number, flow_data in enumerate(matrix[0]):
        start_dates.append(None)
        """Check if data has too many zeros or NaN, and if so skip to next water year"""
        if np.isnan(matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                matrix[:, column_number] == 0) > max_zero_allowed_per_year:
            continue
        """Append each column with 30 more days from next column, except the last column"""
        if column_number != len(matrix[0]) - 1:
            flow_data = list(matrix[:, column_number]) + list(
                matrix[:100, column_number + 1])
        else:
            flow_data = matrix[:, column_number]
        """Replace any NaNs with previous day's flow"""
        flow_data = replace_nan(flow_data)
        """Smooth out the timeseries"""
        smooth_data = gaussian_filter1d(flow_data, 4)
        smooth_data2 = gaussian_filter1d(flow_data, 12)
        x_axis = list(range(len(smooth_data)))
        """Find spline fit equation for smoothed timeseries, and find derivative of spline"""
        spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3)
        spl_first = spl.derivative(1)

        max_flow_data = max(smooth_data[:366])
        max_flow_index = find_index(smooth_data, max_flow_data)
        """Find the major peaks of the filtered data"""
        mean_flow = np.nanmean(flow_data)
        maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity)
        """Set search range after last smoothed peak flow"""
        for flow_index in reversed(maxarray):
            if int(flow_index[0]) < max_peak_flow_date:
                max_flow_index = int(flow_index[0])
                break
        """Set a magnitude threshold below which start of summer can begin"""
        min_flow_data = min(smooth_data[max_flow_index:366])
        threshold = min_flow_data + (smooth_data[max_flow_index] -
                                     min_flow_data) * min_summer_flow_percent

        current_sensitivity = 1 / sensitivity
        start_dates[-1] = None
        for index, data in enumerate(smooth_data):
            if index == len(smooth_data) - 2:
                break
            """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow"""
            if abs(
                    spl_first(index)
            ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold:
                start_dates[-1] = index
                break

        _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first,
                              start_dates, threshold, max_flow_index, maxarray,
                              smooth_data, smooth_data2)

    return start_dates
def calc_spring_transition_timing_magnitude(flow_matrix):
    max_zero_allowed_per_year = spring_params['max_zero_allowed_per_year']
    max_nan_allowed_per_year = spring_params['max_nan_allowed_per_year']
    max_peak_flow_date = spring_params[
        'max_peak_flow_date']  # max search date for the peak flow date
    search_window_left = spring_params[
        'search_window_left']  # left side of search window set around max peak
    search_window_right = spring_params[
        'search_window_right']  # right side of search window set around max peak
    peak_sensitivity = spring_params[
        'peak_sensitivity']  # smaller => more peaks detection
    peak_filter_percentage = spring_params[
        'peak_filter_percentage']  # Relative flow (Q-Qmin) of start of spring must be certain percentage of peak relative flow (Qmax-Qmin)
    min_max_flow_rate = spring_params['min_max_flow_rate']
    window_sigma = spring_params[
        'window_sigma']  # Heavy filter to identify major peaks in entire water year
    fit_sigma = spring_params[
        'fit_sigma']  # Smaller filter to identify small peaks in windowed data (smaller sigma val => less filter)
    sensitivity = spring_params[
        'sensitivity']  # 0.1 - 10, 0.1 being the most sensitive
    min_percentage_of_max_flow = spring_params[
        'min_percentage_of_max_flow']  # the detected date's flow has be certain percetage of the max flow in that region
    lag_time = spring_params['lag_time']

    timings = []
    magnitudes = []
    for column_number, column_flow in enumerate(flow_matrix[0]):
        current_sensitivity = sensitivity / 1000

        timings.append(None)
        magnitudes.append(None)
        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                flow_matrix[:,
                            column_number] == 0) > max_zero_allowed_per_year:
            continue
        """Get flow data and interpolate between None values"""
        flow_data = flow_matrix[:, column_number]
        flow_data = replace_nan(flow_data)
        x_axis = list(range(
            len(flow_data)))  # Extract for use in optional plotting
        """Using Gaussian with heavy sigma to smooth the curve"""
        filter_data = gaussian_filter1d(flow_data, window_sigma)
        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        maxarray, minarray = peakdet(
            filter_data, mean_flow * peak_sensitivity
        )  # Returns array with the index and flow magnitude for each peak and valley
        """Find the max flow in the curve and determine flow range requirements"""
        max_flow = np.nanmax(filter_data)
        max_flow_index = find_index(filter_data, max_flow)
        min_flow = np.nanmin(filter_data)
        flow_range = max_flow - min_flow
        """Identify rightmost peak that fulfills date and magnitude requirements"""
        for flow_index in reversed(maxarray):
            if int(flow_index[0]) < max_peak_flow_date and (
                    flow_index[1] -
                    min_flow) / flow_range > peak_filter_percentage:
                max_flow_index = int(flow_index[0])
                break

        if np.nanmax(filter_data) < min_max_flow_rate:
            """Set start of spring index to the max flow index, when the annual max flow is below certain threshold.
            This is used for extremely low flows where data appears to be stepwise
            """
            max_filter_data = np.nanmax(flow_data)
            timings[-1] = find_index(flow_data, max_filter_data)
            magnitudes[-1] = max_filter_data
        else:
            if max_flow_index < search_window_left:
                search_window_left = 0
            if max_flow_index > 366 - search_window_right:
                search_window_right = 366 - max_flow_index
            """Get indices of windowed data"""
            max_flow_index_window = max(
                flow_data[max_flow_index - search_window_left:max_flow_index +
                          search_window_right])
            timings[-1] = find_index(flow_data, max_flow_index_window)
            magnitudes[-1] = max_flow_index_window
            """Gaussian filter again on the windowed data (smaller filter this time)"""
            x_axis_window = list(
                range(max_flow_index - search_window_left,
                      max_flow_index + search_window_right))
            flow_data_window = gaussian_filter1d(
                flow_data[max_flow_index - search_window_left:max_flow_index +
                          search_window_right], fit_sigma)
            """Fit a spline on top of the Gaussian curve"""
            if len(flow_data_window) < 50:
                continue

            spl = ip.UnivariateSpline(x_axis_window,
                                      flow_data_window,
                                      k=3,
                                      s=3)
            """Calculate the first derivative of the spline"""
            spl_first_deriv = spl.derivative(1)
            """Find where the derivative of the spline crosses zero"""
            index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window))
            """Offset the new index"""
            new_index = []
            for index in index_zeros:
                new_index.append(max_flow_index - search_window_left + index)
            """Loop through the indices where derivative=0, from right to left"""
            for i in reversed(new_index):
                threshold = max(spl_first_deriv(x_axis_window))
                max_flow_window = max(spl(x_axis_window))
                min_flow_window = min(spl(x_axis_window))
                range_window = max_flow_window - min_flow_window
                """Set spring timing as index which fulfills the following requirements"""
                if spl(i) - spl(
                        i - 1
                ) > threshold * current_sensitivity * 1 and spl(i - 1) - spl(
                        i - 2
                ) > threshold * current_sensitivity * 2 and spl(i - 2) - spl(
                        i - 3
                ) > threshold * current_sensitivity * 3 and spl(i - 3) - spl(
                        i - 4) > threshold * current_sensitivity * 4 and (
                            spl(i) - min_flow_window
                        ) / range_window > min_percentage_of_max_flow:
                    timings[-1] = i
                    break
            """Check if timings is before max flow index"""
            if timings[-1] < max_flow_index:
                timings[-1] = max_flow_index + lag_time
            """Find max flow 4 days before and 7 days ahead. Assign as new start date"""
            if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10:
                max_flow_window_new = max(flow_data[timings[-1] -
                                                    4:timings[-1] + 7])
                new_timings = find_index(
                    flow_data[timings[-1] - 4:timings[-1] + 7],
                    max_flow_window_new)
                timings[-1] = timings[-1] - 4 + new_timings + lag_time
                magnitudes[-1] = max_flow_window_new

            # _spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, search_window_left, search_window_right, spl, column_number, maxarray)

    return timings, magnitudes
Beispiel #11
0
def calc_spring_transition_timing_magnitude(flow_matrix,
                                            class_number,
                                            summer_timings,
                                            spring_params=def_spring_params):
    max_zero_allowed_per_year = spring_params['max_zero_allowed_per_year']
    max_nan_allowed_per_year = spring_params['max_nan_allowed_per_year']
    # max search date for the peak flow date
    max_peak_flow_date = spring_params['max_peak_flow_date']
    # left side of search window set around max peak
    search_window_left = spring_params['search_window_left']
    # right side of search window set around max peak
    search_window_right = spring_params['search_window_right']
    # smaller => more peaks detection
    peak_sensitivity = spring_params['peak_sensitivity']
    # Relative flow (Q-Qmin) of start of spring must be certain percentage of peak relative flow (Qmax-Qmin)
    peak_filter_percentage = spring_params['peak_filter_percentage']
    min_max_flow_rate = spring_params['min_max_flow_rate']
    # Heavy filter to identify major peaks in entire water year
    window_sigma = spring_params['window_sigma']
    # Smaller filter to identify small peaks in windowed data (smaller sigma val => less filter)
    fit_sigma = spring_params['fit_sigma']
    # 0.1 - 10, 0.1 being the most sensitive
    sensitivity = spring_params['sensitivity']
    # the detected date's flow has be certain percetage of the max flow in that region
    min_percentage_of_max_flow = spring_params['min_percentage_of_max_flow']
    lag_time = spring_params['lag_time']
    timing_cutoff = spring_params['timing_cutoff']
    # Don't calculate flow metrics if max flow is befow this value.
    min_flow_rate = spring_params['min_flow_rate']

    timings = []
    magnitudes = []

    for column_number, _ in enumerate(flow_matrix[0]):

        timings.append(None)
        magnitudes.append(None)
        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                flow_matrix[:, column_number] ==
                0) > max_zero_allowed_per_year or max(
                    flow_matrix[:, column_number]) < min_flow_rate:
            continue
        """Get flow data and interpolate between None values"""
        flow_data = flow_matrix[:, column_number]
        flow_data = replace_nan(flow_data)
        # Extract for use in optional plotting
        x_axis = list(range(len(flow_data)))

        current_sensitivity = sensitivity / 1000
        """Reduce sensitivity in rain-dominated gages"""
        if class_number == 4 or 6 or 7 or 8:
            max_peak_flow_date = 255
        """Use specialized smoothing sigma values for rain-dominated classes"""
        if class_number == 7:
            window_sigma = 2
        if class_number == 6:
            window_sigma = 2.5
        if class_number == 8:
            window_sigma = 2.5
        if class_number == 4:
            window_sigma = 2.5
        """Reduce the minimum flow magnitude requirement for rain-dominated classes"""
        if class_number == 4 or 6 or 7 or 8:
            min_percentage_of_max_flow = .05
        """Using Gaussian with heavy sigma to smooth the curve"""
        filter_data = gaussian_filter1d(flow_data, window_sigma)
        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        # Returns array with the index and flow magnitude for each peak and valley
        maxarray, minarray = peakdet(filter_data, mean_flow * peak_sensitivity)
        """Find the max flow in the curve and determine flow range requirements"""
        max_flow = np.nanmax(filter_data)
        max_flow_index = find_index(filter_data, max_flow)
        min_flow = np.nanmin(filter_data)
        flow_range = max_flow - min_flow
        """Use specialized relative peak magnitude requirements for rain-dominated classes"""
        if class_number == 7:
            peak_filter_percentage = 0.05
            min_percentage_of_max_flow = 0.05
        if class_number == 6:
            peak_filter_percentage = .12
            min_percentage_of_max_flow = 0.12
        if class_number == 8:
            peak_filter_percentage = .15
            min_percentage_of_max_flow = .15
        if class_number == 4:
            peak_filter_percentage = .10
            min_percentage_of_max_flow = .10
        """Identify rightmost peak that fulfills date and magnitude requirements"""
        for counter, flow_index in enumerate(reversed(maxarray)):
            if int(flow_index[0]) < max_peak_flow_date and (
                    flow_index[1] -
                    min_flow) / flow_range > peak_filter_percentage:
                max_flow_index = int(flow_index[0])
                break

        if np.nanmax(filter_data) < min_max_flow_rate:
            """Set start of spring index to the max flow index, when the annual max flow is below certain threshold.
            This is used for extremely low flows where seasonal timings are harder to find
            """
            max_filter_data = np.nanmax(flow_data)
            timings[-1] = find_index(flow_data, max_filter_data)
            magnitudes[-1] = max_filter_data
        else:
            if max_flow_index < search_window_left:
                current_search_window_left = 0
            else:
                current_search_window_left = search_window_left
            if max_flow_index > 366 - search_window_right:
                current_search_window_right = 366 - max_flow_index
            else:
                current_search_window_right = search_window_right
            """Get indices of windowed data"""
            max_flow_index_window = max(
                flow_data[max_flow_index -
                          current_search_window_left:max_flow_index +
                          current_search_window_right])
            timings[-1] = find_index(flow_data, max_flow_index_window)
            magnitudes[-1] = max_flow_index_window
            """Gaussian filter again on the windowed data (smaller filter this time)"""
            x_axis_window = list(
                range(max_flow_index - current_search_window_left,
                      max_flow_index + current_search_window_right))
            flow_data_window = gaussian_filter1d(
                flow_data[max_flow_index -
                          current_search_window_left:max_flow_index +
                          current_search_window_right], fit_sigma)
            """If search window is too small, move on to next value in maxarray. If it is the last value in maxarray, proceed inside loop"""
            if len(flow_data_window) < 50:
                if counter != len(maxarray) - 1:
                    continue
            """Fit a spline on top of the Gaussian curve"""
            spl = ip.UnivariateSpline(x_axis_window,
                                      flow_data_window,
                                      k=3,
                                      s=3)
            """Calculate the first derivative of the spline"""
            spl_first_deriv = spl.derivative(1)
            """Find where the derivative of the spline crosses zero"""
            index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window))
            """Offset the new index"""
            new_index = []
            for index in index_zeros:
                new_index.append(max_flow_index - current_search_window_left +
                                 index)
            """Loop through the indices where derivative=0, from right to left"""
            for i in reversed(new_index):
                threshold = max(spl_first_deriv(x_axis_window))
                max_flow_window = max(spl(x_axis_window))
                min_flow_window = min(spl(x_axis_window))
                range_window = max_flow_window - min_flow_window
                """Set spring timing as index which fulfills the following requirements"""
                if summer_timings[
                        column_number] is not None and i < summer_timings[
                            column_number] and i > timing_cutoff and spl(
                                i
                            ) - spl(
                                i - 1
                            ) > threshold * current_sensitivity * 1 and spl(
                                i - 1
                            ) - spl(
                                i - 2
                            ) > threshold * current_sensitivity * 2 and spl(
                                i - 2
                            ) - spl(
                                i - 3
                            ) > threshold * current_sensitivity * 3 and spl(
                                i - 3) - spl(
                                    i - 4
                                ) > threshold * current_sensitivity * 4 and (
                                    spl(i) - min_flow_window
                                ) / range_window > min_percentage_of_max_flow:
                    timings[-1] = i
                    break
            """Check if timings is before max flow index"""
            if timings[
                    -1] < max_flow_index:  # replace max flow index with cutoff date
                timings[-1] = max_flow_index + lag_time
            """Find max flow 4 days before and 7 days ahead. Assign as new start date"""
            if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10:
                max_flow_window_new = max(flow_data[timings[-1] -
                                                    4:timings[-1] + 7])
                new_timings = find_index(
                    flow_data[timings[-1] - 4:timings[-1] + 7],
                    max_flow_window_new)
                timings[-1] = timings[-1] - 4 + new_timings + lag_time
                magnitudes[-1] = max_flow_window_new

            if summer_timings[column_number] is not None and timings[
                    -1] > summer_timings[column_number]:
                timings[-1] = None

        # _spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, current_search_window_left, current_search_window_right, spl, column_number, maxarray)

    return timings, magnitudes
def calc_start_of_summer(matrix,
                         class_number,
                         summer_params=def_summer_params):
    """Set adjustable parameters for start of summer date detection"""
    params = set_user_params(summer_params, def_summer_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year, sigma, sensitivity, peak_sensitivity, max_peak_flow_date, min_summer_flow_percent, min_flow_rate = params.values(
    )

    start_dates = []
    for column_number, flow_data in enumerate(matrix[0]):
        start_dates.append(None)
        """Check if data has too many zeros or NaN, and if so skip to next water year"""
        if pd.isnull(matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                matrix[:,
                       column_number] == 0) > max_zero_allowed_per_year or max(
                           matrix[:, column_number]) < min_flow_rate:
            continue
        """Append each column with 30 more days from next column, except the last column"""
        if column_number != len(matrix[0]) - 1:
            flow_data = list(matrix[:, column_number]) + \
                list(matrix[:100, column_number+1])
        else:
            flow_data = matrix[:, column_number]
        """Replace any NaNs with previous day's flow"""
        flow_data = replace_nan(flow_data)
        """Set specific parameters for rain-dominated classes"""
        if class_number == 4 or 6 or 7 or 8:
            sensitivity = 1100
            peak_sensitivity = .1
            sigma = 4
        """Smooth out the timeseries"""
        smooth_data = gaussian_filter1d(flow_data, sigma)
        x_axis = list(range(len(smooth_data)))
        """Find spline fit equation for smoothed timeseries, and find derivative of spline"""
        spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3)
        spl_first = spl.derivative(1)

        max_flow_data = max(smooth_data[:366])
        max_flow_index = find_index(smooth_data, max_flow_data)
        """Find the major peaks of the filtered data"""
        mean_flow = np.nanmean(flow_data)
        maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity)
        """Set search range after last smoothed peak flow"""
        for flow_index in reversed(maxarray):
            if int(flow_index[0]) < max_peak_flow_date:
                max_flow_index = int(flow_index[0])
                break
        """Set a magnitude threshold below which start of summer can begin"""
        min_flow_data = min(smooth_data[max_flow_index:366])
        threshold = min_flow_data + \
            (smooth_data[max_flow_index] - min_flow_data) * \
            min_summer_flow_percent

        current_sensitivity = 1 / sensitivity
        start_dates[-1] = None
        for index, data in enumerate(smooth_data):
            if index == len(smooth_data) - 2:
                break
            """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow"""
            if abs(
                    spl_first(index)
            ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold:
                start_dates[-1] = index
                break

        # _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first, start_dates, threshold, max_flow_index, maxarray)

    return start_dates
def dim_hydrograph_plotter_agg(start_date, directory_name, end_with,
                               class_number, gauge_numbers, plot):
    aggregate_matrix = np.zeros(
        (366, 7))  # change to 5 for matrix without min/max
    counter = 0
    for root, dirs, files in os.walk(directory_name):
        for file in files:
            if file.endswith(end_with):
                fixed_df = pd.read_csv('{}/{}'.format(directory_name, file),
                                       sep=',',
                                       encoding='latin1',
                                       dayfirst=False,
                                       header=None).dropna(axis=1, how='all')
                step = is_multiple_date_data(fixed_df)
                current_gauge_column_index = 1
                while current_gauge_column_index <= (len(fixed_df.iloc[1, :]) -
                                                     1):
                    if gauge_numbers:
                        if int(fixed_df.iloc[
                                1,
                                current_gauge_column_index]) in gauge_numbers:
                            current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(
                                fixed_df, current_gauge_column_index,
                                start_date)
                            start_year_index = find_index(
                                year_ranges,
                                int(gauge_reference[int(current_gauge_number)]
                                    ['start']))
                            end_year_index = find_index(
                                year_ranges,
                                int(gauge_reference[int(current_gauge_number)]
                                    ['end']))
                            flow_matrix = flow_matrix[:, start_year_index:
                                                      end_year_index]
                            aggregate_matrix = np.add(
                                aggregate_matrix, _getAggMatrix(flow_matrix))
                            counter = counter + 1

                    elif int(fixed_df.iloc[0,
                                           current_gauge_column_index]) == int(
                                               class_number):
                        current_gauge_class, current_gauge_number, year_ranges, flow_matrix, julian_dates = convert_raw_data_to_matrix(
                            fixed_df, current_gauge_column_index, start_date)
                        start_year_index = find_index(
                            year_ranges,
                            int(gauge_reference[int(current_gauge_number)]
                                ['start']))
                        end_year_index = find_index(
                            year_ranges,
                            int(gauge_reference[int(current_gauge_number)]
                                ['end']))
                        flow_matrix = flow_matrix[:, start_year_index:
                                                  end_year_index]
                        aggregate_matrix = np.add(aggregate_matrix,
                                                  _getAggMatrix(flow_matrix))
                        counter = counter + 1

                    current_gauge_column_index = current_gauge_column_index + step

        final_aggregate = aggregate_matrix / counter

        if class_number:
            np.savetxt(
                "post_processedFiles/Hydrographs/Class_{}_aggregate.csv".
                format(int(current_gauge_class)),
                final_aggregate,
                delimiter=",",
                fmt="%s")
        else:
            np.savetxt(
                "post_processedFiles/Hydrographs/plot_data_{}.csv".format(
                    int(current_gauge_number)),
                final_aggregate,
                delimiter=",",
                fmt="%s")
        """To output plot, uncomment line below"""
        _plotter(final_aggregate, start_date)