def transform_observation_CDF(self, COEFF, OBS, distribution): """ COEFF : [[shape, mu, sigma],[shape, mu, sigma].......,[shape, mu, sigma]] """ cdf_collection = [] if distribution == 'gev': for sample_index in range(len(COEFF)): shape = COEFF[sample_index][0] mu = COEFF[sample_index][1] sigma = COEFF[sample_index][2] cdf = gev.cdf(OBS[sample_index], shape, loc=mu, scale=sigma) cdf_collection.append(cdf) elif distribution == 'TN' or 'LN': for sample_index in range(len(COEFF)): mu = COEFF[sample_index][0] sigma = COEFF[sample_index][1] if distribution == 'TN': #print('OBS : ' + str(OBS[sample_index])) cdf = TN_CDF(OBS[sample_index], mu, sigma, a=0.0, b=np.inf) #print(cdf) cdf_collection.append(cdf) elif distribution == 'LN': cdf = norm.cdf( (np.log(OBS[sample_index] + sys.float_info.epsilon) - mu) / sigma) cdf_collection.append(cdf) return cdf_collection
def CalThresholdCDF(self, COEFF, distribution, threshold): CDF_larger_than_threshold = [[]] * len(COEFF) CDF_less_than_threshold = [[]] * len(COEFF) if distribution == 'gev': for sample_index in range(len(COEFF)): shape = COEFF[sample_index][0] mu = COEFF[sample_index][1] sigma = COEFF[sample_index][2] CDF_larger_than_threshold[sample_index] = 1.0 - gev.cdf( threshold, shape, loc=mu, scale=sigma) CDF_less_than_threshold[ sample_index] = 1.0 - CDF_larger_than_threshold[ sample_index] elif distribution == 'LN': for sample_index in range(len(COEFF)): mu = COEFF[sample_index][0] sigma = COEFF[sample_index][1] CDF_larger_than_threshold[sample_index] = 1.0 - norm.cdf( (np.log(threshold + sys.float_info.epsilon) - mu) / sigma) CDF_less_than_threshold[ sample_index] = 1.0 - CDF_larger_than_threshold[ sample_index] #s is sigma in scipy lognorm package elif distribution == 'TN': for sample_index in range(len(COEFF)): mu = COEFF[sample_index][0] sigma = COEFF[sample_index][1] CDF_larger_than_threshold[sample_index] = 1.0 - TN_CDF( threshold, mu, sigma, a=0.0, b=np.inf) CDF_less_than_threshold[ sample_index] = 1.0 - CDF_larger_than_threshold[ sample_index] return CDF_larger_than_threshold, CDF_less_than_threshold
def gev_CRPS(x, mu, sigma, shape_para): score = np.nan cdf = gev.cdf(x, c=shape_para, loc=mu, scale=sigma) if cdf < 0.0: cdf = 0.0 + sys.float_info.epsilon elif cdf > 1.0: cdf = 1.0 - sys.float_info.epsilon if shape_para == 0.0: Euler_Mascheroni_constant = 0.577215664901532 score = mu - x + sigma * (Euler_Mascheroni_constant - np.log(2.0) ) - 2.0 * sigma * (ei(np.log(cdf))) else: #print('~~~~~~~~~~~~~~~start~~~~~~~~~~~~~~~~~~~~') #print(1.0 + shape_para * (x - mu)/sigma) #print(x,mu,sigma,shape_para) if 1.0 + shape_para * (x - mu) / sigma <= 0.0: x = -1.0 * sigma / shape_para + mu if math.fabs(shape_para) > 0.95: if shape_para < -0.95: shape_para = -0.95 + sys.float_info.epsilon elif shape_para > 0.95: shape_para = 0.95 - sys.float_info.epsilon #print(1.0 + shape_para * (x - mu)/sigma) sub = np.power(2.0, shape_para) * special.gamma( 1.0 - shape_para) - 2.0 * special.gamma( 1.0 - shape_para) * special.gammainc(1.0 - shape_para, -1.0 * np.log(cdf)) score = (mu - x - sigma / shape_para) * (1.0 - 2.0 * cdf) - ( sigma / shape_para) * sub #print('~~~~~~~~~~~~~~~end~~~~~~~~~~~~~~~~~~~~') return score
def extreme_value_prob(params, NPM, perc): n = NPM.shape[0] t = NPM.shape[1] n_perc = int(round(t * perc)) m = np.zeros(n) for i in range(n): temp = np.abs(NPM[i, :]) temp = np.sort(temp) temp = temp[t - n_perc:] temp = temp[0:int(np.floor(0.90*temp.shape[0]))] m[i] = np.mean(temp) if params[0] <= 0: # if the shape is right tailed for extreme values probs = genextreme.cdf(m,*params) elif params[0] > 0: # if the shape is left tailed for extreme values probs = 1 - genextreme.cdf(m,*params) return probs
def gevfit(sr): gev_fit = gev.fit(sr) c = gev_fit[0] mu = gev_fit[1] sigma = gev_fit[2] print(""" GEV Fit Parameters: shape parameter c: %s location parameter mu: %s scale parameter sigma: %s """ % (c, sigma, mu)) print("Median", gev.median(c, mu, sigma)) print("Mean", gev.mean(c, mu, sigma)) print("Std dev", gev.std(c, mu, sigma)) print("95% interval: ", gev.interval(0.95, c, mu, sigma)) if (c > 0): lBnd = mu - sigma / c else: lBnd = mu + sigma / c srmax = np.max(sr) * 1.1 bins = sr.size x = np.linspace(np.min(sr) - 5, np.max(sr) + 5, 500) #x=np.linspace(lBnd,srmax,500) gev_pdf = gev.pdf(x, c, mu, sigma) gev_cdf = gev.cdf(x, c, mu, sigma) plt.figure(figsize=(12, 6)) ax1 = plt.subplot(1, 2, 1) plt.hist(sr, normed=True, alpha=0.2, label='Raw Data', bins='auto') plt.plot(x, gev_pdf, 'r--', label='GEV Fit') plt.legend(loc='upper left') ax1.set_title('%s_Probability Density Fraction' % (sr.name)) ax1.set_xlabel('Predicted Fatigue Limit (MPa)') ax1.set_ylabel('Probability') ax1.grid() ax2 = plt.subplot(1, 2, 2) plt.hist(sr, normed=True, alpha=0.2, label='Raw Data', cumulative=True, bins='auto') plt.plot(x, gev_cdf, 'r--', label='GEV Fit') plt.legend(loc='upper left') ax2.set_title('%s_Cumulative Density Fraction' % (sr.name)) ax2.set_xlabel('Predicted Fatigue Limit (MPa)') ax2.set_ylabel('Density') ax2.grid() plt.show() pass
def prob(self, x, estimador): try: return genextreme.cdf(x, c=self.shape, loc=self.loc, scale=self.scale) except AttributeError: if estimador not in self.estimadores: raise ValueError('Estimador não existe') else: eval('self.' + estimador)() return self.prob(x, estimador=estimador)
def EstimaProbabilidade(self, Magnitude, Parametros): if self.tipoSerie == 'Parcial': probabilidade = genpareto.cdf(Magnitude, Parametros[0], loc = Parametros[1], scale = Parametros[2]) elif self.tipoSerie == 'Anual': probabilidade = genextreme.cdf(Magnitude, Parametros[0], loc = Parametros[1], scale = Parametros[2]) return probabilidade
def test_gev_cdf(): """ Make sure that the custom gev_cdf function works just like the scipy implementation """ for shape in [-10, -0.001, 0.0, 0.001, 10]: for loc in [10, 500]: for scale in [0.01, 100]: for x in [-1000, 0, 100]: estimate_mine = gev_cdf(x=x, shape=shape, loc=loc, scale=scale) # note scipy uses negative for the shape parameter estimate_scipy = gev.cdf(x=x, c=-shape, loc=loc, scale=scale) assert_almost_equal(estimate_mine, estimate_scipy, decimal=4)
def extreme_value_prob(params, NPM, perc): n = NPM.shape[0] t = NPM.shape[1] n_perc = int(round(t * perc)) m = np.zeros(n) for i in range(n): temp = np.abs(NPM[i, :]) temp = np.sort(temp) temp = temp[t - n_perc:] temp = temp[0:int(np.floor(0.90 * temp.shape[0]))] m[i] = np.mean(temp) probs = genextreme.cdf(m, *params) return probs
def extreme_value_prob(params, NPM, perc): n = NPM.shape[0] t = NPM.shape[1] n_perc = int(round(t * perc)) m = np.zeros(n) for i in range(n): temp = np.abs(NPM[i, :]) temp = np.sort(temp) temp = temp[t - n_perc:] m[i] = trim_mean(temp, 0.05) probs = genextreme.cdf(m,*params) return probs
def gev_fit(var_fit): c = -0.1 vv = np.linspace(0, 10, 200) sha_g, loc_g, sca_g = genextreme.fit(var_fit, c) pg = genextreme.cdf(vv, sha_g, loc_g, sca_g) ix = pg > 0.1 vv = vv[ix] ts = 1 / (1 - pg[ix]) # TODO gev params 95% confidence intervals return ts, vv
def graAcumulado(self, dados, forma, posicao, escala): dados.sort() dadosExt = [] ''' for i in range(1, 1001): dadosExt.append(self.ler.serieExtensa(i, 'Fluviometrico')) dadosExt.sort() yExt = gev.pdf(dadosExt, -0.168462, 6286.926278, 1819.961392) ''' yd = gev.cdf(dados, forma, posicao, escala) plt.plot(dados,yd,'-r', label = 'Forma: %s\nPosicao: %s\nEscala: %s' % (forma, posicao, escala)) #plt.plot(dadosExt, yExt,'-r') plt.ylabel('probabilidade de não excedência') plt.xlabel('Vazão(m³/s)') plt.legend(numpoints = 1, loc = "best") plt.show()
def test_flood_probability(): """ Test that the estimated flood probability is correct for known inputs """ flood_height = 500 fm = FloodModel( loc_base=250, loc_trend=2, coeff_var=0.1, shape=0.2, zero_time=2015, scale_min=1e-3, ) # again recall scipy uses negative of shape parameter desired_prob = 1.0 - gev.cdf(x=flood_height, c=-0.2, loc=420, scale=42.0) flood_prob = fm.calc_exceedance_prob(2100, flood_height) assert_almost_equal(flood_prob, desired_prob, decimal=4)
def plotCDF(x, gevfit, e, xLabel, Title, EventFlow=None, EventT=None, EventLabel=None, fname=None): ''' Plots CDF of data in Pandas Series x. ------------------------------------------------------------------------------------------- Input: x: Pandas series gevfit: Tuple with the three fitted GEV parameters e: Numpy array with exceedance probabilities xLabel: Str label to use for x-axis Title: Str chart title EventFlow: (Optional) Flow of event that needs to be highlighted as a separate marker EventT: (Optional) Return period of flow of event that needs to be highlighted as a separate marker EventLabel: (Optional) Legend label of flow of event that needs to be highlighted as a separate marker fname: (Optional) Full path to filename to save the figure in *.png format ''' fig, ax = plt.subplots(1, 1) mx = max(x) plt.hlines(1, 0, mx + 250, colors='k', linestyles='--') q = genextreme.cdf(x, gevfit[0], gevfit[1], gevfit[2]) ax.plot(x, q, color='k', label='Fit') ax.scatter(x, 1 - e, color=colors[0], label='Recorded data', s=15) if EventFlow and EventT and EventLabel: ax.scatter(EventFlow, 1 - (1 / EventT), c='g', s=100, label=EventLabel) ax.yaxis.grid() plt.xlabel(xLabel) plt.ylabel('CDF [-]') plt.xlim(0, mx + 100) plt.ylim(0, 1) plt.title(Title) plt.grid(True, which='both') ax.legend(loc='lower right') if fname: plt.savefig(fname, dpi=600.) else: plt.show()
def EstimaFrequencias(self, Parametros): if self.tipoSerie == 'Parcial': limite = lp.LimiteParcial(self.dadoSerie).AchaLimite(2) Parciais = se.Series(self.dadoSerie).serieMaxParcial(limite) datasP, PicosParciais = se.Series(Parciais).separaDados() PicosParciais.sort(reverse = True) print(PicosParciais) frequencias = genpareto.cdf(PicosParciais, Parametros[0], loc = Parametros[1], scale = Parametros[2]) elif self.tipoSerie == 'Anual': Anuais = se.Series(self.dadoSerie).serieMaxAnual() datasA, PicosAnuais = se.Series(Anuais).separaDados() PicosAnuais.sort(reverse = True) print(PicosAnuais) frequencias = genextreme.cdf(PicosAnuais, Parametros[0], loc = Parametros[1], scale = Parametros[2]) return frequencias
print(i, RR_L, RR_SA) L_return = 40 / RR_L SA_return = 40 / RR_SA #L_rain = pd.read_csv('/Users/Jasper/Lesotho-ERA5.csv') #SA_rain = pd.read_csv('/Users/Jasper/SA-ERA5.csv') return_period = np.linspace(1, len(L_rain), len(L_rain)) return_period = return_period / (len(return_period) + 1) L_rain = L_rain.sort_values(by=['JFM_prec']) SA_rain = SA_rain.sort_values(by=['JFM_prec']) shape_SA, loc_SA, scale_SA = gev.fit(SA_rain['JFM_prec']) xx_SA = np.linspace(100, 1000, 1000) yy_SA = 1 / (gev.cdf(xx_SA, shape_SA, loc_SA, scale_SA)) shape_L, loc_L, scale_L = gev.fit(L_rain['JFM_prec']) xx_L = np.linspace(100, 1000, 1000) yy_L = 1 / (gev.cdf(xx_L, shape_L, loc_L, scale_L)) ### find the index id_SA_return1 = (np.abs(yy_SA - SA_return)).argmin() val_SA_return = xx_SA[id_SA_return1] id_L_return1 = (np.abs(yy_L - L_return)).argmin() val_L_return = xx_L[id_L_return1] ### find the index id_SA_return2 = (np.abs(yy_SA - 40)).argmin() val_SA_return_ACT = xx_SA[id_SA_return2]
def extremal_distribution_fit(data, var_name, sample, threshold, fit_type, x_min, x_max, n_points, loc=None, scale=None, cumulative=True): # Initialization of the output variables param = None x = None y = None y_rp = None if fit_type == 'gpd': # Fit the exceedances over threshold to Generalized Pareto distribution param = generalized_pareto_distribution_fit(sample, threshold, loc, scale) # Calculate the pdf and/or cdf x = np.linspace(x_min, x_max, n_points) if cumulative: y = genpareto.cdf(x, param[0], param[1], param[2]) # Calculate the number of extreme peaks per year n_peaks_year = len(sample) / len( data[var_name].index.year.unique()) y_rp = return_period_curve(n_peaks_year, y) else: y = genpareto.pdf(x, param[0], param[1], param[2]) elif fit_type == 'coles': # Fit the exceedances over threshold to Generalized Pareto distribution param = generalized_pareto_distribution_fit(sample, threshold, loc, scale) x = np.arange(1, 501) u = param[1] sigma = param[2] xi = param[0] # Mean number of data in a year (numero medio de datos en un año) n_y = len(data[var_name]) / len(data[var_name].index.year.unique()) # Total number of POT / number of years z_u = len(sample) / len(data[var_name]) # n_y*z_u is the number of POT / number of years -- > numer of POT per year y_rp = u + (sigma / xi) * (((x * n_y * z_u)**xi) - 1) elif fit_type == 'gev': param = generalized_extreme_value_distribution_fit(sample, loc, scale) # Calculate the pdf and/or cdf x = np.linspace(x_min, x_max, n_points) if cumulative: y = genextreme.cdf(x, param[0], param[1], param[2]) # Calculate the number of extreme peaks per year n_peaks_year = 1 y_rp = return_period_curve(n_peaks_year, y) else: y = genpareto.pdf(x, param[0], param[1], param[2]) elif fit_type == 'poisson': # Calculate the pdf and/or cdf x = np.linspace(x_min, x_max, n_points) # Fit the exceedances over threshold to Generalized Pareto distribution gpd_param = generalized_pareto_distribution_fit( sample, threshold, loc, scale) # Poisson parameter (número de eventos extraños al año) poisspareto_param = len(sample) / len( data[var_name].index.year.unique()) # Poisson pareto parameters poisspareto_param = [ poisspareto_param, gpd_param[0], gpd_param[2], gpd_param[1] ] # Equivalent gev parameters param = [0, 0, 0] param[0] = -poisspareto_param[1] param[1] = poisspareto_param[2] * (poisspareto_param[0]** poisspareto_param[1]) param[2] = poisspareto_param[3] + ( (poisspareto_param[2] / poisspareto_param[1]) * ((poisspareto_param[0]**poisspareto_param[1]) - 1)) if cumulative: y = genextreme.cdf(x, param[0], param[2], param[1]) # Calculate the number of extreme peaks per year n_peaks_year = 1 y_rp = return_period_curve(n_peaks_year, y) else: y = genextreme.pdf(x, param[0], param[2], param[1]) return param, x, y, y_rp
def StatisticalProperties(self, PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath, SeparateFiles=False, Filter=False, Distibution="GEV", EstimateParameters=False, Quartile=0, RIMResults=False, SignificanceLevel=0.1): """ ============================================================================= StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath, SeparateFiles = False, Filter = False, RIMResults = False) ============================================================================= StatisticalProperties method reads the SWIM output file (.dat file) that contains the time series of discharge for some computational nodes and calculate some statistical properties the code assumes that the time series are of a daily temporal resolution, and that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH). Parameters ---------- 1-PathNodes : [String] the name of the file which contains the ID of the computational nodes you want to do the statistical analysis for, the ObservedFile should contain the discharge time series of these nodes in order. 2-PathTS : [String] the name of the SWIM result file (the .dat file). 3-StartDate : [string] the begining date of the time series. 4-WarmUpPeriod : [integer] the number of days you want to neglect at the begining of the Simulation (warm up period). 5-SavePlots : [Bool] DESCRIPTION. 6-SavePath : [String] the path where you want to save the statistical properties. 7-SeparateFiles: [Bool] if the discharge data are stored in separate files not all in one file SeparateFiles should be True, default [False]. 8-Filter: [Bool] for observed or RIMresult data it has gaps of times where the model did not run or gaps in the observed data if these gap days are filled with a specific value and you want to ignore it here give Filter = Value you want 9-RIMResults: [Bool] If the files are results form RIM or observed, as the format differes between the two. default [False] Returns ------- 1-Statistical Properties.csv: file containing some statistical properties like mean, std, min, 5%, 25%, median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50, q100, q200, q500. """ ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16) # hydrographs if SeparateFiles: TS = pd.DataFrame() if RIMResults: for i in range(len(ComputationalNodes)): TS.loc[:, int(ComputationalNodes[i])] = self.ReadRIMResult( PathTS + "/" + str(int(ComputationalNodes[i])) + '.txt') else: for i in range(len(ComputationalNodes)): TS.loc[:, int(ComputationalNodes[i])] = np.loadtxt( PathTS + "/" + str(int(ComputationalNodes[i])) + '.txt') #,skiprows = 0 StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d") EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1) ind = pd.date_range(StartDate, EndDate) TS.index = ind else: TS = pd.read_csv(PathTS, delimiter=r'\s+', header=None) StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d") EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1) TS.index = pd.date_range(StartDate, EndDate, freq="D") # delete the first two columns del TS[0], TS[1] TS.columns = ComputationalNodes # neglect the first year (warmup year) in the time series TS = TS.loc[StartDate + dt.timedelta(days=WarmUpPeriod):EndDate, :] # List of the table output, including some general data and the return periods. col_csv = [ 'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max', 't_beg', 't_end', 'nyr' ] rp_name = [ 'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500', 'q1000' ] col_csv = col_csv + rp_name # In a table where duplicates are removed (np.unique), find the number of # gauges contained in the .csv file. # no_gauge = len(ComputationalNodes) # Declare a dataframe for the output file, with as index the gaugne numbers # and as columns all the output names. StatisticalPr = pd.DataFrame(np.nan, index=ComputationalNodes, columns=col_csv) StatisticalPr.index.name = 'ID' DistributionPr = pd.DataFrame(np.nan, index=ComputationalNodes, columns=['loc', 'scale']) DistributionPr.index.name = 'ID' # required return periods T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500, 1000] T = np.array(T) # these values are the Non Exceedance probability (F) of the chosen # return periods F = 1 - (1/T) # Non Exceedance propabilities #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998] F = 1 - (1 / T) # Iteration over all the gauge numbers. for i in ComputationalNodes: QTS = TS.loc[:, i] # The time series is resampled to the annual maxima, and turned into a # numpy array. # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH). amax = QTS.resample('A-OCT').max().values if type(Filter) != bool: amax = amax[amax != Filter] if EstimateParameters: # estimate the parameters through an optimization # alpha = (np.sqrt(6) / np.pi) * amax.std() # beta = amax.mean() - 0.5772 * alpha # param_dist = [beta, alpha] threshold = np.quantile(amax, Quartile) if Distibution == "GEV": print("Still to be finished later") else: param = Gumbel.EstimateParameter(amax, Gumbel.ObjectiveFn, threshold) param_dist = [param[1], param[2]] else: # estimate the parameters through an maximum liklehood method if Distibution == "GEV": param_dist = genextreme.fit(amax) else: # A gumbel distribution is fitted to the annual maxima param_dist = gumbel_r.fit(amax) if Distibution == "GEV": DistributionPr.loc[i, 'c'] = param_dist[0] DistributionPr.loc[i, 'loc'] = param_dist[1] DistributionPr.loc[i, 'scale'] = param_dist[2] else: DistributionPr.loc[i, 'loc'] = param_dist[0] DistributionPr.loc[i, 'scale'] = param_dist[1] # Return periods from the fitted distribution are stored. # get the Discharge coresponding to the return periods if Distibution == "GEV": Qrp = genextreme.ppf(F, param_dist[0], loc=param_dist[1], scale=param_dist[2]) else: Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1]) # to get the Non Exceedance probability for a specific Value # sort the amax amax.sort() # calculate the F (Exceedence probability based on weibul) cdf_Weibul = ST.Weibul(amax) # Gumbel.ProbapilityPlot method calculates the theoretical values based on the Gumbel distribution # parameters, theoretical cdf (or weibul), and calculate the confidence interval if Distibution == "GEV": Qth, Qupper, Qlower = GEV.ProbapilityPlot( param_dist, cdf_Weibul, amax, SignificanceLevel) # to calculate the F theoretical Qx = np.linspace(0, 1.5 * float(amax.max()), 10000) pdf_fitted = genextreme.pdf(Qx, param_dist[0], loc=param_dist[2], scale=param_dist[2]) cdf_fitted = genextreme.cdf(Qx, param_dist[0], loc=param_dist[1], scale=param_dist[2]) else: Qth, Qupper, Qlower = Gumbel.ProbapilityPlot( param_dist, cdf_Weibul, amax, SignificanceLevel) # gumbel_r.interval(SignificanceLevel) # to calculate the F theoretical Qx = np.linspace(0, 1.5 * float(amax.max()), 10000) pdf_fitted = gumbel_r.pdf(Qx, loc=param_dist[0], scale=param_dist[1]) cdf_fitted = gumbel_r.cdf(Qx, loc=param_dist[0], scale=param_dist[1]) # then calculate the the T (return period) T = 1/(1-F) if SavePlots: fig = plt.figure(60, figsize=(20, 10)) gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig) # Plot the histogram and the fitted distribution, save it for each gauge. ax1 = fig.add_subplot(gs[0, 0]) ax1.plot(Qx, pdf_fitted, 'r-') ax1.hist(amax, density=True) ax1.set_xlabel('Annual Discharge(m3/s)', fontsize=15) ax1.set_ylabel('pdf', fontsize=15) ax2 = fig.add_subplot(gs[0, 1]) ax2.plot(Qx, cdf_fitted, 'r-') ax2.plot(amax, cdf_Weibul, '.-') ax2.set_xlabel('Annual Discharge(m3/s)', fontsize=15) ax2.set_ylabel('cdf', fontsize=15) plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png', format='png') plt.close() fig = plt.figure(70, figsize=(10, 8)) plt.plot(Qth, amax, 'd', color='#606060', markersize=12, label='Gumbel Distribution') plt.plot(Qth, Qth, '^-.', color="#3D59AB", label="Weibul plotting position") if Distibution != "GEV": plt.plot(Qth, Qlower, '*--', color="#DC143C", markersize=12, label='Lower limit (' + str(int( (1 - SignificanceLevel) * 100)) + " % CI)") plt.plot(Qth, Qupper, '*--', color="#DC143C", markersize=12, label='Upper limit (' + str(int( (1 - SignificanceLevel) * 100)) + " % CI)") plt.legend(fontsize=15, framealpha=1) plt.xlabel('Theoretical Annual Discharge(m3/s)', fontsize=15) plt.ylabel('Annual Discharge(m3/s)', fontsize=15) plt.savefig(SavePath + "/" + "Figures/F-" + str(i) + '.png', format='png') plt.close() StatisticalPr.loc[i, 'mean'] = QTS.mean() StatisticalPr.loc[i, 'std'] = QTS.std() StatisticalPr.loc[i, 'min'] = QTS.min() StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05) StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25) StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50) StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75) StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95) StatisticalPr.loc[i, 'max'] = QTS.max() StatisticalPr.loc[i, 't_beg'] = QTS.index.min() StatisticalPr.loc[i, 't_end'] = QTS.index.max() StatisticalPr.loc[ i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] - StatisticalPr.loc[i, 't_beg']).days / 365.25 for irp, irp_name in zip(Qrp, rp_name): StatisticalPr.loc[i, irp_name] = irp # Print for prompt and check progress. print("Gauge", i, "done.") # # Output file StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv") self.StatisticalPr = StatisticalPr DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv") self.DistributionPr = DistributionPr
# Display the probability density function (``pdf``): x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100) ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = genextreme(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = genextreme.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c)) # True # Generate random numbers: r = genextreme.rvs(c, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()