def construct_IDF(self): if self.ci: bts = {} for col in self.reformatted_ams.columns: mams = [] for i in range(self.number_bootstrap): bootsams = np.random.choice( self.reformatted_ams[col].values, replace=True, size=len(self.reformatted_ams)) fit = gev.fit(bootsams) mams.append( gev.isf(self.quantiles, c=fit[0], loc=fit[1], scale=fit[2])) bts[col] = np.asarray(mams) p_lo = ((1.0-self.alpha)/2.0) * 100 p_up = (self.alpha+((1.0-self.alpha)/2.0)) * 100 for col in self.reformatted_ams.columns: lower = np.apply_along_axis(np.percentile, 0, bts[col], p_lo) upper = np.apply_along_axis(np.percentile, 0, bts[col], p_up) median = np.apply_along_axis( np.percentile, 0, bts[col], 50) self.idf[col] = np.append(lower, np.append(median, upper)) else: for col in self.reformatted_ams.columns: fit = gev.fit(self.reformatted_ams[col]) self.idf[col] = gev.isf(self.quantiles, c=fit[0], loc=fit[1], scale=fit[2])
def extreme_values(weighted_residuals, confidence_interval): ''' This function uses extreme value theory to calculate the number of standard deviations away from the mean at which we should expect to bracket *all* of our n data points at a certain confidence level. It then uses that value to identify which (if any) of the data points lie outside that region, and calculates the corresponding probabilities of finding a data point at least that many standard deviations away. Parameters ---------- weighted_residuals : array of floats Array of residuals weighted by the square root of their variances wr_i = r_i/sqrt(var_i) confidence_interval : float Probability at which all the weighted residuals lie within the confidence bounds Returns ------- confidence_bound : float Number of standard deviations at which we should expect to encompass all data at the user-defined confidence interval. indices : array of floats Indices of weighted residuals exceeding the confidence_interval defined by the user probabilities : array of floats The probabilities that the extreme data point of the distribution lies further from the mean than the observed position wr_i for each i in the "indices" output array. ''' n = len(weighted_residuals) mean = norm.isf(1./n) # good approximation for > 10 data points scale = 0.8/np.power(np.log(n), 1./2.) # good approximation for > 10 data points c = 0.33/np.power(np.log(n), 3./4.) # We now need a 1-tailed probability from the given confidence_interval # p_total = 1. - confidence_interval = p_upper + p_lower - p_upper*p_lower # p_total = 1. - confidence_interval = 2p - p^2, therefore: p = 1. - np.sqrt(confidence_interval) confidence_bound = genextreme.isf(p, c, loc=mean, scale=scale) indices = [i for i, r in enumerate(weighted_residuals) if np.abs(r) > confidence_bound] # Convert back to 2-tailed probabilities probabilities = (1. - np.power(genextreme.sf(np.abs(weighted_residuals[indices]), c, loc=mean, scale=scale) - 1., 2.)) return confidence_bound, indices, probabilities
def extreme_values(weighted_residuals, confidence_interval): ''' This function uses extreme value theory to calculate the number of standard deviations away from the mean at which we should expect to bracket *all* of our n data points at a certain confidence level. It then uses that value to identify which (if any) of the data points lie outside that region, and calculates the corresponding probabilities of finding a data point at least that many standard deviations away. Parameters ---------- weighted_residuals : array of floats Array of residuals weighted by the square root of their variances wr_i = r_i/sqrt(var_i) confidence_interval : float Probability at which all the weighted residuals lie within the confidence bounds Returns ------- confidence_bound : float Number of standard deviations at which we should expect to encompass all data at the user-defined confidence interval. indices : array of floats Indices of weighted residuals exceeding the confidence_interval defined by the user probabilities : array of floats The probabilities that the extreme data point of the distribution lies further from the mean than the observed position wr_i for each i in the "indices" output array. ''' n=len(weighted_residuals) mean = norm.isf(1./n) scale = 0.8/np.power(np.log(n), 1./2.) # good approximation for > 10 data points c = 0.33/np.power(np.log(n), 3./4.) # good approximation for > 10 data points # We now need a 1-tailed probability from the given confidence_interval # p_total = 1. - confidence_interval = p_upper + p_lower - p_upper*p_lower # p_total = 1. - confidence_interval = 2p - p^2, therefore: p = 1. - np.sqrt(confidence_interval) confidence_bound = genextreme.isf(p, c, loc=mean, scale=scale) indices = [i for i, r in enumerate(weighted_residuals) if np.abs(r) > confidence_bound] probabilities = 1. - np.power(genextreme.sf(np.abs(weighted_residuals[indices]), c, loc=mean, scale=scale) - 1., 2.) # Convert back to 2-tailed probabilities return confidence_bound, indices, probabilities
def plot_return_values(annual_max, station_id): fig, axes = plt.subplots(figsize=(20,6)) T=np.r_[1:500] mle = genextreme.fit(sorted(annual_max), 0) mu = mle[1] sigma = mle[2] xi = mle[0] # print "The mean, sigma, and shape parameters are %s, %s, and %s, resp." % (mu, sigma, xi) sT = genextreme.isf(1./T, 0, mu, sigma) axes.semilogx(T, sT, 'r'), hold N=np.r_[1:len(annual_max)+1]; Nmax=max(N); axes.plot(Nmax/N, sorted(annual_max)[::-1], 'bo') title = station_id axes.set_title(title) axes.set_xlabel('Return Period (yrs)') axes.set_ylabel('Wind Speed (m/s)') axes.grid(True)
def plot_return_values(annual_max, station_id): fig, axes = plt.subplots(figsize=(20, 6)) T = np.r_[1:500] mle = genextreme.fit(sorted(annual_max), 0) mu = mle[1] sigma = mle[2] xi = mle[0] # print "The mean, sigma, and shape parameters are %s, %s, and %s, resp." % (mu, sigma, xi) sT = genextreme.isf(1. / T, 0, mu, sigma) axes.semilogx(T, sT, 'r'), hold N = np.r_[1:len(annual_max) + 1] Nmax = max(N) axes.plot(Nmax / N, sorted(annual_max)[::-1], 'bo') title = station_id axes.set_title(title) axes.set_xlabel('Return Period (yrs)') axes.set_ylabel('Wind Speed (m/s)') axes.grid(True)
# <codecell> Image(url='http://tidesandcurrents.noaa.gov/est/images/color_legend.png') # <markdowncell> # <script type="text/javascript"> # $('div.input').show(); # </script> # <codecell> fig = plt.figure(figsize=(20,6)) axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) T=np.r_[1:250] sT = genextreme.isf(1./T, 0, mu, sigma) axes.semilogx(T, sT, 'r'), hold N=np.r_[1:len(annual_max_levels)+1]; Nmax=max(N); axes.plot(Nmax/N, sorted(annual_max_levels)[::-1], 'bo') title = s["long_name"][0] axes.set_title(title) axes.set_xlabel('Return Period (yrs)') axes.set_ylabel('Meters above MHHW') axes.set_xticklabels([0,1,10,100,1000]) axes.set_xlim([0,260]) axes.set_ylim([0,1.8]) axes.grid(True) # <markdowncell>
sigma_wis = mle_wis[2] xi_wis = mle_wis[0] print "The mean, sigma, and shape parameters are %s, %s, and %s, resp." % ( mu_wis, sigma_wis, xi_wis) # <markdowncell> # ### Return Value Plot # <codecell> fig, axes = plt.subplots(2, 1, figsize=(20, 12)) # fig = plt.figure() # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) T = np.r_[1:500] sT = genextreme.isf(1. / T, 0, mu, sigma) axes[0].semilogx(T, sT, 'r'), hold N = np.r_[1:len(annual_max) + 1] Nmax = max(N) axes[0].plot(Nmax / N, sorted(annual_max)[::-1], 'bo') title = station_longName axes[0].set_title(title) axes[0].set_xlabel('Return Period (yrs)') axes[0].set_ylabel('Significant Wave Height (m)') axes[0].grid(True) sT_wis = genextreme.isf(1. / T, 0, mu_wis, sigma_wis) axes[1].semilogx(T, sT_wis, 'r'), hold N = np.r_[1:len(wis_maximums) + 1] Nmax = max(N) axes[1].plot(Nmax / N, sorted(wis_maximums)[::-1], 'bo')
mu_wis = mle_wis[1] sigma_wis = mle_wis[2] xi_wis = mle_wis[0] print "The mean, sigma, and shape parameters are %s, %s, and %s, resp." % (mu_wis, sigma_wis, xi_wis) # <markdowncell> # ### Return Value Plot # <codecell> fig, axes = plt.subplots(2, 1, figsize=(20,12)) # fig = plt.figure() # axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) T=np.r_[1:500] sT = genextreme.isf(1./T, 0, mu, sigma) axes[0].semilogx(T, sT, 'r'), hold N=np.r_[1:len(annual_max)+1]; Nmax=max(N); axes[0].plot(Nmax/N, sorted(annual_max)[::-1], 'bo') title = station_longName axes[0].set_title(title) axes[0].set_xlabel('Return Period (yrs)') axes[0].set_ylabel('Significant Wave Height (m)') axes[0].grid(True) sT_wis = genextreme.isf(1./T, 0, mu_wis, sigma_wis) axes[1].semilogx(T, sT_wis, 'r'), hold N=np.r_[1:len(wis_maximums)+1]; Nmax=max(N); axes[1].plot(Nmax/N, sorted(wis_maximums)[::-1], 'bo')