def gev(self, x): # 计算广义极值分布的参数 x = x[~np.isnan(x)] if np.sum(x) == 0.0: return np.array([7.15350690e-07, 5.15304206e-06, -9.92961972e-01]) else: return lmoments.pelgev(lmoments.samlmu(x))
def test_lm(): import lmoments x = [360.228515625, 513.506103515625, 273.85031127929688, 340.94839477539062, 244.13925170898438, 283.414306640625, 394.42819213867188, 284.3604736328125, 281.26956176757812, 241.46173095703125, 489.75482177734375, 236.31536865234375, 407.55133056640625, 244.6295166015625, 432.40670776367188, 260.501953125, 517.23052978515625, 317.6553955078125, 407.61935424804688, 275.0709228515625, 330.369140625, 285.92086791992188, 247.9954833984375, 344.34811401367188, 379.55596923828125, 330.80569458007812, 312.35330200195312, 251.79550170898438, 372.66928100585938, 239.72474670410156] print(get_initial_params_using_lm(x)) print(np.mean(x)) pars = [ 128.28104749, 578.4927539 , 0.62410911] data = [588.4747314453125, 693.6640625, 519.03155517578125, 716.58013916015625, 686.29168701171875, 432.65786743164062, 682.72113037109375, 730.12603759765625, 698.971923828125, 491.75332641601562, 597.258544921875, 487.13619995117188, 482.33123779296875, 573.57861328125, 801.67169189453125, 616.41668701171875, 690.954833984375, 671.31646728515625, 680.87554931640625, 534.18414306640625, 427.86019897460938, 236.22953796386719, 691.40972900390625, 599.84637451171875, 545.3563232421875, 553.059814453125, 549.1295166015625, 658.3983154296875, 719.122802734375, 636.84906005859375] the_moments = lmoments.samlmu(sorted(data),5) pars = lmoments.pelgev(the_moments[0:3]) print(pars) mu, sigma, xi = pars print(objective_function_stationary_high([sigma, mu, -xi], data))
def func(data): sample = lmoments.randgev(len(data), gevfit) samgevfit = lmoments.pelgev(lmoments.samlmu(sample)) T = np.arange(0.1, 999.1, 0.1) + 1 sT = lmoments.quagev(1.0 - 1. / T, samgevfit) res = samgevfit res.extend(sT.tolist()) return tuple(res)
def pearsonfit(data): data=np.array(data) nozero=len(data.nonzero()[0]) pze=1-float(nozero)/len(data) para=lm.pelpe3(lm.samlmu(data[data!=0],3)) p3= np.array([lm.cdfpe3(i,para) for i in data]) p3=stats.norm.ppf(p3) return p3
def glo(data): """Generalized Logistic Generalized Logistic distribution function.""" para=lm.pelglo(lm.samlmu(data,4)) p3= np.array([lm.cdfglo(i,para) for i in data]) p3=stats.norm.ppf(p3.astype(float)) pn=stats.norm.ppf(lm.cdfnor(p3,[0,1])) return pn
def fit(self, data): # Initialise variables: loc, scale, shp = [self.nodata] * 3 w = nodata * np.ones(len( self.intervals)) # Create empty return period array if (data.max() > 0.): # Check for valid data ii = np.flatnonzero( data ) # Return indices for non-zero elements of the flattened aray # Only calculate l-moments for those grid points where the values are # not all equal, and where there are 50 or more valid (>0) values. if data[ii].min() != data[ii].max(): if len(ii) >= self.minrecords: l1, l2, l3 = lmom.samlmu(data, 3) # find 3 l-moments # t3 = L-skewness (Hosking 1990) t3 = l3 / l2 if (l2 <= 0.) or (np.abs(t3) >= 1.): # Reject points where the second l-moment is negative # or the ratio of the third to second is > 1, i.e. positive # skew. log.debug("Invalid l-moments") else: # Parameter estimation returns the location, scale and # shape parameters xmom = [l1, l2, t3] # Calculates the parameters of the distribution given its # L-moments. GEV distribution calculated. loc, scale, shp = np.array(lmom.pelgev(xmom)) # We only store the values if the first parameter is # finite (i.e. the location parameter is finite) if not np.isfinite(loc): loc, scale, shp = [self.nodata] * 3 # Calculate return period wind speeds for i, t in enumerate(self.intervals): # if no valid fit was found, then there are no return period wind speeds if shp <= 0.: w[i] = self.nodata # if a valid fit was found... else: # Calculate wind speed for each return period w[i] = ( np.transpose(loc + (scale / shp) * (1. - np.power(-1. * np.log(1. - (1. / t)), shp)))) # Replace any non-finite numbers with the missing value: if not np.isfinite(w[i]): w[i] = self.nodata return w, loc, scale, shp
def pearsonfit(data): data = np.array(data) nozero = len(data.nonzero()[0]) pze = 1 - float(nozero) / len(data) #print pze #data2=ma.masked_values(data.values, 0) para = lm.pelpe3(lm.samlmu(data[data != 0], 3)) p3 = np.array([lm.cdfpe3(i, para) for i in data]) p3 = stats.norm.ppf(p3) #p3=stats.norm.ppf(pze+(1-pze)*lm.cdfnor(p3,[0,1])) return p3
def gammafit(Data): data = np.array(Data) index1 = np.where(data == 0)[0] index2 = np.where(data != 0)[0] pze = float(len(index1)) / len(data) print(pze) if pze >= (1 / 16.): indx = np.where(data == 0)[0] data[indx[0]] = 0.001 ## nozero=len(data.nonzero()[0]) ## pze=1-float(nozero)/len(data) #print pze #data2=ma.masked_values(data.values, 0) para = lm.pelgam(lm.samlmu(data[data != 0], 2)) #global pze print(para, pze) print(data) gam = np.array([lm.cdfgam(i, para) for i in data]) #gam[index1]=0 #global cdf ## gam[index2]=np.ma.array(gam) gam = stats.norm.ppf(gam.astype(float)) cdf = np.zeros( shape=len(gam)) #([pze+(1-pze)*lm.cdfnor(i,[0,1]) for i in gam]) cdf[index1] = pze cdf[index2] = np.array( [pze + (1 - pze) * lm.cdfnor(i, [0, 1]) for i in gam[index2]]) print(cdf) ## print para pn = stats.norm.ppf(cdf.astype(float)) pn[np.where(pn < -4)[0]] = -4 del data return pn
def gamma_parameters(summed_monthly_values, scale_months): ''' :param monthly_values: :param scale_months: :return: ''' # allocate the array of gamma parameters we'll return gamma_parameters = np.full((12, 2), np.nan, dtype=np.float64) # process each calendar month's values separately for i in range(12): # get the values for the calendar month calendar_month_sums = summed_monthly_values[i::12] # strip out all the NaN values calendar_month_sums = calendar_month_sums[np.logical_not( np.isnan(calendar_month_sums))] # get the non-zero values only (resulting array will still contain NaNs if present) nonzero_calendar_month_values = calendar_month_sums[np.nonzero( calendar_month_sums)] # determine how many zeros there were number_of_sums = calendar_month_sums.shape[0] number_of_nonzeros = nonzero_calendar_month_values.shape[0] number_of_zeros = number_of_sums - number_of_nonzeros # calculate the probability of zero, the first gamma parameter probability_of_zero = number_of_zeros / number_of_sums if probability_of_zero > 0.1: gamma_parameters[i, :] = 0.0 else: # Fit gamma distribution try: LMU = lmoments.samlmu(nonzero_calendar_month_values) gamfit = lmoments.pelgam(LMU) gamma_parameters[i, :] = gamfit except: gamma_parameters[i, :] = 0.0 return gamma_parameters
def do_it(data, title): print("AAAA") lmom = lmoments.samlmu(data) print("BBBB") gevfit = lmoments.pelgev( lmom) # the parameters of the GEV distribtion as estimated on data print(gevfit) # return years (1.1 to 1000) T = np.arange(0.1, 999.1, 0.1) + 1 sT = lmoments.quagev(1.0 - 1. / T, gevfit) # prepare index for obs N = np.r_[1:len(data) + 1] * 1.0 #must *1.0 to convert int to float Nmax = max(N) # get confidence intervals bootout = ci_bootstrap(data, gevfit) ci_Td = bootout["ci_Td"] ci_Tu = bootout["ci_Tu"] params_ci = bootout["params_ci"] fig, ax = plt.subplots() plt.setp(ax.lines, linewidth=2, color='magenta') base_x = 10 subs_x = [2, 3, 4, 5, 6, 7, 8, 9] ax.set_title("Execution Time GEV Distribution - " + title, fontsize=18) ax.set_xlabel("Expected Frequency of Extrema", fontsize=14) ax.set_ylabel("Maximum Execution Time", fontsize=14) ax.semilogx(T, sT, basex=base_x) #ax.plot(T, sT) ax.scatter(Nmax / N, np.sort(data)[::-1], color='orangered') ax.semilogx(T, ci_Td, '--', basex=base_x) #ax.plot(T, ci_Td, '--') ax.semilogx(T, ci_Tu, '--', basex=base_x) #ax.plot(T, ci_Tu, '--') ax.fill_between(T, ci_Td, ci_Tu, color='0.75', alpha=0.5) plt.show()
def _wak(self, dat, l): ''' Fit a wakeby distribution''' para = lm.pelwak(lm.samlmu(dat, l)) _cdf = partial(lm.cdfwak, para=para) _pdf = partial(lm.pdfwak, para=para) return para, _cdf, _pdf
def _gno(self, dat, l): '''Fit a Generalized Normal Dist''' para = lm.pelgno(lm.samlmu(dat, l)) _cdf = partial(lm.cdfgno, para=para) _pdf = partial(lm.pdfgno, para=para) return para, _cdf, _pdf
check = 0 for i in range(0,len(inp)): if round(inp[i],roundnum) != round(outp[i],roundnum): print("ERROR found in "+name) print("EXPECTED VALUE: "+str(outp[i])) print("ACTUAL VALUE: "+str(inp[i])) check = 1 if check == 0: print(name+" Function SUCCESS") pass except: print("ERROR found in "+name) testdata = [2.0,3.0,4.0,2.4,5.5,1.2,5.4,2.2,7.1,1.3,1.5] LMU = lmoments.samlmu(testdata) correctLMU = [3.23636364, 1.14181818, 0.27388535, 0.02335456, -0.04246285] correctLMU = [3.23636364, 1.14181818, 0.27388535, 0.02335456, -0.04246285] check = 0 for i in range(0,len(correctLMU)): if round(LMU[i],6) != round(correctLMU[i],6): print("ERROR found in SAMLMU") print("EXPECTED VALUE: "+str(correctLMU[i])) print("ACTUAL VALUE: "+str(LMU[i])) check = 1 if check == 0: print("SAMLMU Function SUCCESS") print("#######################################") ####################################### ##EXP
def _pe3(self, dat, l): ''' Fit a Pearson type 3 distribution''' para = lm.pelpe3(lm.samlmu(dat, l)) _cdf = partial(lm.cdfpe3, para=para) _pdf = partial(lm.pdfpe3, para=para) return para, _cdf, _pdf
max_wind_30.append(max(windslice)) i = i + 1 df1 = pd.DataFrame({'Max U10': max_wind_30}) #we want the highest wind speeds to get the highest rank df1['Ranked'] = df1['Max U10'].rank(ascending=0) df1 = df1.sort_values(by=['Ranked']) ranklist1 = df1['Ranked'].tolist() windlist1 = df1['Max U10'].tolist() windlist1 = windlist1[::-1] ratio = len(windlist1) / 38. #fit the different extreme value distributions try: LMU = lmoments.samlmu(windlist1) gevfit = lmoments.pelgev(LMU) expfit = lmoments.pelexp(LMU) gumfit = lmoments.pelgum(LMU) weifit = lmoments.pelwei(LMU) gpafit = lmoments.pelgpa(LMU) gevST = lmoments.quagev(1.0 - 1. / T, gevfit) expST = lmoments.quaexp(1.0 - 1. / T, expfit) gumST = lmoments.quagum(1.0 - 1. / T, gumfit) weiST = lmoments.quawei(1.0 - 1. / T, weifit) gpaST = lmoments.quagpa(1.0 - 1. / T, gpafit) ratiolist.append(ratio) for t, tl in zip(range(0, len(bootstrapgev)), tlist): bootstrapgev[t].append(np.interp(tl, T, gevST))
def estimateEVD(v, years, missingValue=-9999., minRecords=50, yrspersim=1): """ Calculate extreme value distribution parameters using the Lmoments module :param v: array of data values. :type v: :class:`numpy.ndarray` :param years: array of years for which to calculate return period values. :type years: :class:`numpy.ndarray` :param float missingValue: value to insert if fit does not converge. :param int minRecords: minimum number of valid observations required to perform fitting. :param int yrspersim: data represent block maxima - this gives the length of each block in years. :return: return period values :rtype: :class:`numpy.ndarray` :return: location, shape and scale parameters of the distribution :rtype: float """ # Convert to float to prevent integer division & ensure consistent data # types for output variables yrspersim = float(yrspersim) missingValue = float(missingValue) years = np.array(years) # Initialise variables: loc, scale, shp = [missingValue, missingValue, missingValue] w = missingValue * np.ones(len(years)) if (v.max() > 0.): ii = np.flatnonzero(v) # Only calculate l-moments for those grid points where the values are # not all equal, and where there are 50 or more valid values. if v[ii].min() != v[ii].max(): if len(ii) >= minRecords: l1, l2, l3 = lmom.samlmu(v[ii], 3) t3 = l3 / l2 if (l2 <= 0.) or (np.abs(t3) >= 1.): # Reject points where the second l-moment is negative # or the ratio of the third to second is > 1. log.debug("Invalid l-moments") else: # Parameter estimation returns the location, scale and shape # parameters xmom = [l1, l2, t3] loc, scale, shp = np.array(lmom.pelgev(xmom)) # We only store the values if the first parameter is # finite (i.e. the location parameter is finite) if not np.isfinite(loc): loc, scale, shp = [ missingValue, missingValue, missingValue] for i, t in enumerate(years): if shp == -9999: w[i] = missingValue else: w[i] = (np.transpose(loc + (scale / shp) * (1. - np.power(-1. * np.log(1. - (yrspersim / t)), shp)))) # Replace any non-finite numbers with the missing value: if not np.isfinite(w[i]): w[i] = missingValue return w, loc, scale, shp
def _nor(self, dat, l): '''Fir a Normal Distribution''' para = lm.pelnor(lm.samlmu(dat, l)) _cdf = partial(lm.cdfnor, para=para) _pdf = partial(lm.pdfnor, para=para) return para, _cdf, _pdf
def gevfit(data, intervals, nodata=-9999., minrecords=50, yrspersim=1): """ Calculate extreme value distribution parameters using the Lmoments module. Return period values are not calculated if the shape parameter is negative or zero. :param data: array of data values. Values represent max events for each year of simulation at a single grid box :type data: :class:`numpy.ndarray` :param intervals: array of years for which to calculate return period values. :type intervals: :class:`numpy.ndarray` :param float nodata: value to insert if fit does not converge. :param int minRecords: minimum number of valid observations required to perform fitting. :param int yrspersim: data represent block maxima - this gives the length of each block in years. Returns: -------- :param w: `numpy.array` of return period wind speed values :param loc: location parameter :param scale: scale parameter :param shp: shape parameter """ # Convert to float to prevent integer division & ensure consistent data # types for output variables yrspersim = float(yrspersim) nodata = float(nodata) intervals = np.array(intervals) # Initialise variables: loc, scale, shp = [nodata, nodata, nodata] w = nodata * np.ones(len(intervals)) # Create empty return period array if (data.max() > 0.): # Check for valid data ii = np.flatnonzero( data) # Return indices for non-zero elements of the flattened aray # Only calculate l-moments for those grid points where the values are # not all equal, and where there are 50 or more valid (>0) values. if data[ii].min() != data[ii].max(): if len(ii) >= minrecords: l1, l2, l3 = lmom.samlmu(data, 3) # find 3 l-moments # t3 = L-skewness (Hosking 1990) t3 = l3 / l2 if (l2 <= 0.) or (np.abs(t3) >= 1.): # Reject points where the second l-moment is negative # or the ratio of the third to second is > 1, i.e. positive # skew. log.debug("Invalid l-moments") else: # Parameter estimation returns the location, scale and # shape parameters xmom = [l1, l2, t3] # Calculates the parameters of the distribution given its # L-moments. GEV distribution calculated. loc, scale, shp = np.array(lmom.pelgev(xmom)) # We only store the values if the first parameter is # finite (i.e. the location parameter is finite) if not np.isfinite(loc): loc, scale, shp = [nodata, nodata, nodata] # Calculate return period wind speeds for i, t in enumerate(intervals): # if no valid fit was found, then there are no return period wind speeds if shp <= 0.: w[i] = nodata # if a valid fit was found... else: # Calculate wind speed for each return period w[i] = (np.transpose( loc + (scale / shp) * (1. - np.power(-1. * np.log(1. - (yrspersim / t)), shp)))) # Replace any non-finite numbers with the missing value: if not np.isfinite(w[i]): w[i] = nodata return w, loc, scale, shp
def _gpa(self, dat, l): ''' Fit a Generalised Pareto distribution''' para = lm.pelgpa(lm.samlmu(dat, l)) _cdf = partial(lm.cdfgpa, para=para) _pdf = partial(lm.pdfgpa, para=para) return para, _cdf, _pdf
################################################## # 1 GEV (General Extreme V) # para1 = \mu # para2 = \sigma # para3 = \theta if FUNC == 'GEV': #for i in progressbar(range(ysize), "Computing: ", 40): for i in range(ysize): if i % 10 == 0: print(i, 'out of ', ysize) for j in range(xsize): if np.nanmean(datm[:, i, j]) > -9990.: # There are many grids with constant values or # there is only one large value but others are constant. # We cannot calculate the parameters with this time series. if np.std(datm[:-5, i, j]) > 1e-5: lmoms = lmom.samlmu(datm[:, i, j], 4) params = lmom.pelgev(lmoms) try: para1[i, j] = params[0] para2[i, j] = params[1] para3[i, j] = params[2] p_AIC[i, j] = lmom.AIC(datm[:, i, j], FUNC) y = lmom.quagev(p, params) c_AIC[i, j] = calc_aic(datm[:, i, j], y) py_AIC[i, j] = aic.aic(datm[:, i, j], y, len(params)) except: para1[i, j] = np.nanmean(datm[:, i, j]) para2[i, j] = -9999.
def estimateEVD(v, years, missingValue=-9999., minRecords=50, yrspersim=1): """ Calculate extreme value distribution parameters using the Lmoments module. Return period values are not calculated if the shape parameter is negative or zero. :param v: array of data values. :type v: :class:`numpy.ndarray` :param years: array of years for which to calculate return period values. :type years: :class:`numpy.ndarray` :param float missingValue: value to insert if fit does not converge. :param int minRecords: minimum number of valid observations required to perform fitting. :param int yrspersim: data represent block maxima - this gives the length of each block in years. :return: return period values :rtype: :class:`numpy.ndarray` :return: location, shape and scale parameters of the distribution :rtype: float """ # Convert to float to prevent integer division & ensure consistent data # types for output variables yrspersim = float(yrspersim) missingValue = float(missingValue) years = np.array(years) # Initialise variables: loc, scale, shp = [missingValue, missingValue, missingValue] w = missingValue * np.ones(len(years)) if (v.max() > 0.): ii = np.flatnonzero(v) # Only calculate l-moments for those grid points where the values are # not all equal, and where there are 50 or more valid (>0) values. if v[ii].min() != v[ii].max(): if len(ii) >= minRecords: l1, l2, l3 = lmom.samlmu(v, 3) t3 = l3 / l2 if (l2 <= 0.) or (np.abs(t3) >= 1.): # Reject points where the second l-moment is negative # or the ratio of the third to second is > 1. log.debug("Invalid l-moments") else: # Parameter estimation returns the location, scale and # shape parameters xmom = [l1, l2, t3] loc, scale, shp = np.array(lmom.pelgev(xmom)) # We only store the values if the first parameter is # finite (i.e. the location parameter is finite) if not np.isfinite(loc): loc, scale, shp = [ missingValue, missingValue, missingValue ] for i, t in enumerate(years): if shp <= 0.: w[i] = missingValue else: w[i] = (np.transpose( loc + (scale / shp) * (1. - np.power(-1. * np.log(1. - (yrspersim / t)), shp)))) # Replace any non-finite numbers with the missing value: if not np.isfinite(w[i]): w[i] = missingValue return w, loc, scale, shp
def gev(x): param = lmoments.pelgev(lmoments.samlmu(x)) return param
def _gev(self, dat, l): ''' Fit a Generalised Extreme Value distribution''' para = lm.pelgev(lm.samlmu(dat, l)) _cdf = partial(lm.cdfgev, para=para) _pdf = partial(lm.pdfgev, para=para) return para, _cdf, _pdf