def analyzeSinglePoint(x, y): # get the slope, intercept and pvalues from the mklt module ALPHA = 0.05 # MK : string # result of the statistical test indicating whether or not to accept hte # alternative hypothesis 'Ha' # m : scalar, float # slope of the linear fit to the data # c : scalar, float # intercept of the linear fit to the data # p : scalar, float, greater than zero # p-value of the obtained Z-score statistic for the Mann-Kendall test Zmk, MK, m, c, p = mkt.test(x, y, eps=1E-3, alpha=ALPHA, Ha="upordown") ha = 1 if MK.startswith('rej'): ha = 0 # ha = not MK.startswith('reject') res = { 'zmk': Zmk, 'ha': ha, 'm': m, 'c': c, 'p': p, } return res
def show_examples(): """ Returns the MK test results for artificial data. """ # create artificial time series with trend n = 1000 C = [0.01, 0.001, -0.001, -0.01] e = 1.00 t = np.linspace(0., 500, n) # set up figure fig, axes = pl.subplots(nrows=2, ncols=2, figsize=[16.00, 9.00]) # loop through various values of correlation ALPHA = 0.01 for c, ax in zip(C, axes.flatten()): # estimate the measurements 'x' x = c * t + e * np.random.randn(n) x = np.round(x, 2) # get the slope, intercept and pvalues from the mklt module MK, m, c, p = mkt.test(t, x, eps=1E-3, alpha=ALPHA, Ha="upordown") # plot results ax.plot(t, x, "k.-", label="Sampled time series") ax.plot(t, m * t + c, "r-", label="Linear fit") ax.set_title(MK.upper() + "\np=%.3f, alpha = %.2f" % (p, ALPHA), fontweight="bold", fontsize=10) # prettify if ax.is_last_row(): ax.legend(loc="upper right") ax.set_xlabel("Time") if ax.is_first_col(): ax.set_ylabel(r"Measurements $x$") if ax.is_first_row(): ax.legend(loc="upper left") # save/show plot pl.show(fig) return None
def trendTest(self, time_scale, least_records, target_alpha, plot=False): #HKM added / Jul.30.2020 if self.data is None: self.data = self.getDailyDischarge() t_Q = self.data.rename( columns={'Flow ({})'.format(self.getUnit()): 'Flow'}) reason = "no issues" t_Q.Date = pd.to_datetime(t_Q.Date) # If we have a date gap, we should modify this code lines # Here, I assume that USGS provides continuous data valid_flag = True if time_scale == 'M': # Monthly trend t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y-%m')).Flow.agg( ['mean']) if len( t_Q_aggr ) < least_records: # We should have more than 10-year lenth of data valid_flag = False reason = "data shortage" print( f' Data at this gage has records shorter than your defined {least_records} months.\n' ) elif time_scale == 'Y': # Yearly trend t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y')).Flow.agg( ['mean']) if len( t_Q_aggr ) < least_records: # We should have more than 10-year lenth of data valid_flag = False reason = "data shortage" print( f' Data at this gage has records shorter than your defined {least_records} years.\n' ) else: raise Exception( 'Invalid time scale. Please select M (monthly trend) or Y (yearly trend)' ) if valid_flag: x = np.arange((len(t_Q_aggr))) y = t_Q_aggr.to_numpy().ravel() # Theilslopes R_TS = stats.theilslopes(y, x, alpha=1 - target_alpha) """ Ruetunrs: 1) medslope : float Theil slope. 2) medintercept : float Intercept of the Theil line, as median(y) - medslope*median(x). 3) lo_slope : float Lower bound of the confidence interval on medslope. 4) up_slope : float Upper bound of the confidence interval on medslope. https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.mstats.theilslopes.html """ # Mann Kendall Trend Test R_MK = mkt.test(x, y, 1, target_alpha, "upordown") """ Returns 1) MK : string result of the statistical test indicating whether or not to accept hte alternative hypothesis 'Ha' 2) m : scalar, float slope of the linear fit to the data 3) c : scalar, float intercept of the linear fit to the data 4) p : scalar, float, greater than zero p-value of the obtained Z-score statistic for the Mann-Kendall test # https://up-rs-esp.github.io/mkt/_modules/mkt.html """ if (R_MK[3] < target_alpha) & (R_MK[1] > 0): trend_result = 1 # increasing trend slope_result = R_TS[0] elif (R_MK[3] < target_alpha) & (R_MK[1] < 0): trend_result = -1 # decreasing trend slope_result = R_TS[0] else: trend_result = 0 # no trend slope_result = 0 else: # Any cases we cannot conduct the trend analysis trend_result = np.nan slope_result = np.nan R_TS = np.nan R_MK = np.nan reason = "other issues rather than the data shortage" if plot: # monthly or yearly plot with the regression line if (trend_result == -1) | (trend_result == 1): fig, ax = plt.subplots(figsize=(15, 5)) ax.plot(t_aggr_date, y, t_aggr_date, R_TS[0] * np.arange(len(t_aggr_date)) + R_TS[1], 'r--', linewidth=2) ax.set_xlabel('Date', fontsize=12) ax.set_ylabel('Discharge {}'.format(self.getUnit()), fontsize=12) ax.set_title('Discharge at USGS {}'.format(self.id), fontsize=16) elif trend_result == 0: fig, ax = plt.subplots(figsize=(15, 5)) ax.plot(t_aggr_date, y, linewidth=2) ax.set_xlabel('Date', fontsize=12) ax.set_ylabel('Discharge {}'.format(self.getUnit()), fontsize=12) ax.set_title('Discharge at USGS {}'.format(self.id), fontsize=16) plt.text(t_aggr_date[round(len(t_aggr_date) / 2)], (max(y) - min(y)) / 2, "No Trend", size=50, rotation=30., ha="center", va="center", bbox=dict( boxstyle="round", ec=(1., 0.5, 0.5), fc=(1., 0.8, 0.8), )) else: raise Exception('Not enough data to plot') return trend_result, slope_result, R_TS, R_MK, reason
def trendTest(self, time_scale, target_alpha): #HKM added / Jul.30.2020 if self.data is None: self.data = self.getDailyDischarge() t_Q = self.data.rename( columns={'Flow ({})'.format(self.getUnit()): 'Flow'}) # If we have a date gap, we should modify this code lines # Here, I assume that USGS provides continuous data valid_flag = True if time_scale == 'M': # Monthly trend t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y-%m')).Flow.agg( ['mean']) nod = len(t_Q_aggr) if nod < 120: # We should have more than 10-year lenth of data valid_flag = False elif time_scale == 'Y': # Yearly trend t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y')).Flow.agg( ['mean']) nod = len(t_Q_aggr) if nod < 10: # We should have more than 10-year lenth of data valid_flag = False else: print('Please select M (monthly trend) or Y (yearly trend)') valid_flag = False if valid_flag: x = np.arange((len(t_Q_aggr))) y = t_Q_aggr.to_numpy().ravel() # Theilslopes R_TS = stats.theilslopes(y, x, alpha=1 - target_alpha) """ Ruetunrs: 1) medslope : float Theil slope. 2) medintercept : float Intercept of the Theil line, as median(y) - medslope*median(x). 3) lo_slope : float Lower bound of the confidence interval on medslope. 4) up_slope : float Upper bound of the confidence interval on medslope. https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.mstats.theilslopes.html """ # Mann Kendall Trend Test R_MK = mkt.test(x, y, 1, target_alpha, "upordown") """ Returns 1) MK : string result of the statistical test indicating whether or not to accept hte alternative hypothesis 'Ha' 2) m : scalar, float slope of the linear fit to the data 3) c : scalar, float intercept of the linear fit to the data 4) p : scalar, float, greater than zero p-value of the obtained Z-score statistic for the Mann-Kendall test # https://up-rs-esp.github.io/mkt/_modules/mkt.html """ if (R_MK[3] < target_alpha) & (R_MK[1] > 0): trend_result = 1 # increasing trend slope_result = R_TS[0] elif (R_MK[3] < target_alpha) & (R_MK[1] < 0): trend_result = -1 # decreasing trend slope_result = R_TS[0] else: trend_result = 0 # no trend slope_result = 0 else: # Any cases we cannot conduct the trend analysis trend_result = False slope_result = False R_TS = False R_MK = False return trend_result, slope_result, R_TS, R_MK