Python cdf 예제들, scipy.stats.genextreme.cdf Python 예제들

예제 #1

0

파일 보기

파일: verification.py 프로젝트: wlsinaa/exfcst_windspd

    def transform_observation_CDF(self, COEFF, OBS, distribution):
        """
        COEFF : [[shape, mu, sigma],[shape, mu, sigma].......,[shape, mu, sigma]]
        """

        cdf_collection = []
        if distribution == 'gev':
            for sample_index in range(len(COEFF)):
                shape = COEFF[sample_index][0]
                mu = COEFF[sample_index][1]
                sigma = COEFF[sample_index][2]
                cdf = gev.cdf(OBS[sample_index], shape, loc=mu, scale=sigma)
                cdf_collection.append(cdf)
        elif distribution == 'TN' or 'LN':
            for sample_index in range(len(COEFF)):
                mu = COEFF[sample_index][0]
                sigma = COEFF[sample_index][1]
                if distribution == 'TN':
                    #print('OBS : '  + str(OBS[sample_index]))
                    cdf = TN_CDF(OBS[sample_index], mu, sigma, a=0.0, b=np.inf)
                    #print(cdf)
                    cdf_collection.append(cdf)
                elif distribution == 'LN':
                    cdf = norm.cdf(
                        (np.log(OBS[sample_index] + sys.float_info.epsilon) -
                         mu) / sigma)
                    cdf_collection.append(cdf)
        return cdf_collection

예제 #2

0

파일 보기

파일: verification.py 프로젝트: wlsinaa/exfcst_windspd

    def CalThresholdCDF(self, COEFF, distribution, threshold):
        CDF_larger_than_threshold = [[]] * len(COEFF)
        CDF_less_than_threshold = [[]] * len(COEFF)
        if distribution == 'gev':
            for sample_index in range(len(COEFF)):
                shape = COEFF[sample_index][0]
                mu = COEFF[sample_index][1]
                sigma = COEFF[sample_index][2]
                CDF_larger_than_threshold[sample_index] = 1.0 - gev.cdf(
                    threshold, shape, loc=mu, scale=sigma)
                CDF_less_than_threshold[
                    sample_index] = 1.0 - CDF_larger_than_threshold[
                        sample_index]

        elif distribution == 'LN':
            for sample_index in range(len(COEFF)):
                mu = COEFF[sample_index][0]
                sigma = COEFF[sample_index][1]
                CDF_larger_than_threshold[sample_index] = 1.0 - norm.cdf(
                    (np.log(threshold + sys.float_info.epsilon) - mu) / sigma)
                CDF_less_than_threshold[
                    sample_index] = 1.0 - CDF_larger_than_threshold[
                        sample_index]  #s is sigma in scipy lognorm package

        elif distribution == 'TN':
            for sample_index in range(len(COEFF)):
                mu = COEFF[sample_index][0]
                sigma = COEFF[sample_index][1]
                CDF_larger_than_threshold[sample_index] = 1.0 - TN_CDF(
                    threshold, mu, sigma, a=0.0, b=np.inf)
                CDF_less_than_threshold[
                    sample_index] = 1.0 - CDF_larger_than_threshold[
                        sample_index]
        return CDF_larger_than_threshold, CDF_less_than_threshold

예제 #3

0

파일 보기

파일: verification.py 프로젝트: wlsinaa/exfcst_windspd

def gev_CRPS(x, mu, sigma, shape_para):
    score = np.nan
    cdf = gev.cdf(x, c=shape_para, loc=mu, scale=sigma)
    if cdf < 0.0:
        cdf = 0.0 + sys.float_info.epsilon
    elif cdf > 1.0:
        cdf = 1.0 - sys.float_info.epsilon
    if shape_para == 0.0:
        Euler_Mascheroni_constant = 0.577215664901532
        score = mu - x + sigma * (Euler_Mascheroni_constant - np.log(2.0)
                                  ) - 2.0 * sigma * (ei(np.log(cdf)))
    else:
        #print('~~~~~~~~~~~~~~~start~~~~~~~~~~~~~~~~~~~~')
        #print(1.0 + shape_para * (x - mu)/sigma)
        #print(x,mu,sigma,shape_para)
        if 1.0 + shape_para * (x - mu) / sigma <= 0.0:
            x = -1.0 * sigma / shape_para + mu

        if math.fabs(shape_para) > 0.95:
            if shape_para < -0.95:
                shape_para = -0.95 + sys.float_info.epsilon
            elif shape_para > 0.95:
                shape_para = 0.95 - sys.float_info.epsilon
        #print(1.0 + shape_para * (x - mu)/sigma)
        sub = np.power(2.0, shape_para) * special.gamma(
            1.0 - shape_para) - 2.0 * special.gamma(
                1.0 - shape_para) * special.gammainc(1.0 - shape_para,
                                                     -1.0 * np.log(cdf))
        score = (mu - x - sigma / shape_para) * (1.0 - 2.0 * cdf) - (
            sigma / shape_para) * sub
        #print('~~~~~~~~~~~~~~~end~~~~~~~~~~~~~~~~~~~~')
    return score

예제 #4

0

파일 보기

파일: utilities.py 프로젝트: smkia/DNM

def extreme_value_prob(params, NPM, perc):
    n = NPM.shape[0]
    t = NPM.shape[1]
    n_perc = int(round(t * perc))
    m = np.zeros(n)
    for i in range(n):
        temp =  np.abs(NPM[i, :])
        temp = np.sort(temp)
        temp = temp[t - n_perc:]
        temp = temp[0:int(np.floor(0.90*temp.shape[0]))]
        m[i] = np.mean(temp)
    if params[0] <= 0:  # if the shape is right tailed for extreme values
        probs = genextreme.cdf(m,*params)
    elif params[0] > 0: # if the shape is left tailed for extreme values
        probs = 1 - genextreme.cdf(m,*params)
    return probs

예제 #5

0

파일 보기

파일: gevmyh5.py 프로젝트: loicliang/postH5

def gevfit(sr):
    gev_fit = gev.fit(sr)
    c = gev_fit[0]
    mu = gev_fit[1]
    sigma = gev_fit[2]

    print("""
          GEV Fit Parameters:
          shape parameter c: %s
          location parameter mu: %s
          scale parameter sigma: %s
          """ % (c, sigma, mu))

    print("Median", gev.median(c, mu, sigma))
    print("Mean", gev.mean(c, mu, sigma))
    print("Std dev", gev.std(c, mu, sigma))
    print("95% interval: ", gev.interval(0.95, c, mu, sigma))

    if (c > 0):
        lBnd = mu - sigma / c
    else:
        lBnd = mu + sigma / c
    srmax = np.max(sr) * 1.1

    bins = sr.size

    x = np.linspace(np.min(sr) - 5, np.max(sr) + 5, 500)
    #x=np.linspace(lBnd,srmax,500)
    gev_pdf = gev.pdf(x, c, mu, sigma)
    gev_cdf = gev.cdf(x, c, mu, sigma)

    plt.figure(figsize=(12, 6))

    ax1 = plt.subplot(1, 2, 1)
    plt.hist(sr, normed=True, alpha=0.2, label='Raw Data', bins='auto')
    plt.plot(x, gev_pdf, 'r--', label='GEV Fit')
    plt.legend(loc='upper left')
    ax1.set_title('%s_Probability Density Fraction' % (sr.name))
    ax1.set_xlabel('Predicted Fatigue Limit (MPa)')
    ax1.set_ylabel('Probability')
    ax1.grid()

    ax2 = plt.subplot(1, 2, 2)
    plt.hist(sr,
             normed=True,
             alpha=0.2,
             label='Raw Data',
             cumulative=True,
             bins='auto')
    plt.plot(x, gev_cdf, 'r--', label='GEV Fit')
    plt.legend(loc='upper left')
    ax2.set_title('%s_Cumulative Density Fraction' % (sr.name))
    ax2.set_xlabel('Predicted Fatigue Limit (MPa)')
    ax2.set_ylabel('Density')
    ax2.grid()

    plt.show()
    pass

예제 #6

0

파일 보기

 def prob(self, x, estimador):
     try:
         return genextreme.cdf(x, c=self.shape, loc=self.loc, scale=self.scale)
     except AttributeError:
         if estimador not in self.estimadores:
             raise ValueError('Estimador não existe')
         else:
             eval('self.' + estimador)()
         return self.prob(x, estimador=estimador)

예제 #7

0

파일 보기

파일: estatistica.py 프로젝트: clebsonpy/HidroComp

    def EstimaProbabilidade(self, Magnitude, Parametros):
        if self.tipoSerie == 'Parcial':
            probabilidade = genpareto.cdf(Magnitude, Parametros[0],
                                        loc = Parametros[1],
                                        scale = Parametros[2])

        elif self.tipoSerie == 'Anual':
            probabilidade = genextreme.cdf(Magnitude, Parametros[0],
                                            loc = Parametros[1],
                                            scale = Parametros[2])
        return probabilidade

예제 #8

0

파일 보기

def test_gev_cdf():
    """
    Make sure that the custom gev_cdf function works just like the scipy
    implementation
    """
    for shape in [-10, -0.001, 0.0, 0.001, 10]:
        for loc in [10, 500]:
            for scale in [0.01, 100]:
                for x in [-1000, 0, 100]:
                    estimate_mine = gev_cdf(x=x, shape=shape, loc=loc, scale=scale)
                    # note scipy uses negative for the shape parameter
                    estimate_scipy = gev.cdf(x=x, c=-shape, loc=loc, scale=scale)
                    assert_almost_equal(estimate_mine, estimate_scipy, decimal=4)

예제 #9

0

파일 보기

def extreme_value_prob(params, NPM, perc):
    n = NPM.shape[0]
    t = NPM.shape[1]
    n_perc = int(round(t * perc))
    m = np.zeros(n)
    for i in range(n):
        temp = np.abs(NPM[i, :])
        temp = np.sort(temp)
        temp = temp[t - n_perc:]
        temp = temp[0:int(np.floor(0.90 * temp.shape[0]))]
        m[i] = np.mean(temp)
        probs = genextreme.cdf(m, *params)
    return probs

예제 #10

0

파일 보기

def extreme_value_prob(params, NPM, perc):
    n = NPM.shape[0]
    t = NPM.shape[1]
    n_perc = int(round(t * perc))
    m = np.zeros(n)
    for i in range(n):
        temp =  np.abs(NPM[i, :])
        temp = np.sort(temp)
        temp = temp[t - n_perc:]
        m[i] = trim_mean(temp, 0.05)
    probs = genextreme.cdf(m,*params)
    return probs

예제 #11

0

파일 보기

파일: extremes.py 프로젝트: teslakit/teslakit

    def gev_fit(var_fit):
        c = -0.1
        vv = np.linspace(0, 10, 200)

        sha_g, loc_g, sca_g = genextreme.fit(var_fit, c)
        pg = genextreme.cdf(vv, sha_g, loc_g, sca_g)

        ix = pg > 0.1
        vv = vv[ix]
        ts = 1 / (1 - pg[ix])

        # TODO gev params 95% confidence intervals

        return ts, vv

예제 #12

0

파일 보기

파일: funcoesBasicas.py 프로젝트: clebsonpy/HidroComp

 def graAcumulado(self, dados, forma, posicao, escala):
     dados.sort()
     dadosExt = []
     '''
     for i in range(1, 1001):
         dadosExt.append(self.ler.serieExtensa(i, 'Fluviometrico'))
     dadosExt.sort()
     yExt = gev.pdf(dadosExt, -0.168462, 6286.926278, 1819.961392)
     '''
     yd = gev.cdf(dados, forma, posicao, escala)
     plt.plot(dados,yd,'-r', label = 'Forma: %s\nPosicao: %s\nEscala: %s' % (forma, posicao, escala))
     #plt.plot(dadosExt, yExt,'-r')
     plt.ylabel('probabilidade de não excedência')
     plt.xlabel('Vazão(m³/s)')
     plt.legend(numpoints = 1, loc = "best")
     plt.show()

예제 #13

0

파일 보기

def test_flood_probability():
    """
    Test that the estimated flood probability is correct for known inputs
    """
    flood_height = 500
    fm = FloodModel(
        loc_base=250,
        loc_trend=2,
        coeff_var=0.1,
        shape=0.2,
        zero_time=2015,
        scale_min=1e-3,
    )
    # again recall scipy uses negative of shape parameter
    desired_prob = 1.0 - gev.cdf(x=flood_height, c=-0.2, loc=420, scale=42.0)
    flood_prob = fm.calc_exceedance_prob(2100, flood_height)
    assert_almost_equal(flood_prob, desired_prob, decimal=4)

예제 #14

0

파일 보기

def plotCDF(x,
            gevfit,
            e,
            xLabel,
            Title,
            EventFlow=None,
            EventT=None,
            EventLabel=None,
            fname=None):
    '''
    Plots CDF of data in Pandas Series x.
    -------------------------------------------------------------------------------------------
    Input:
        x:            Pandas series
        gevfit:       Tuple with the three fitted GEV parameters
        e:            Numpy array with exceedance probabilities
        xLabel:       Str label to use for x-axis
        Title:        Str chart title
        EventFlow:    (Optional) Flow of event that needs to be highlighted as a separate marker
        EventT:       (Optional) Return period of flow of event that needs to be highlighted as a separate marker
        EventLabel:   (Optional) Legend label of flow of event that needs to be highlighted as a separate marker
        fname:        (Optional) Full path to filename to save the figure in *.png format
    '''

    fig, ax = plt.subplots(1, 1)
    mx = max(x)
    plt.hlines(1, 0, mx + 250, colors='k', linestyles='--')
    q = genextreme.cdf(x, gevfit[0], gevfit[1], gevfit[2])
    ax.plot(x, q, color='k', label='Fit')
    ax.scatter(x, 1 - e, color=colors[0], label='Recorded data', s=15)
    if EventFlow and EventT and EventLabel:
        ax.scatter(EventFlow, 1 - (1 / EventT), c='g', s=100, label=EventLabel)
    ax.yaxis.grid()
    plt.xlabel(xLabel)
    plt.ylabel('CDF [-]')
    plt.xlim(0, mx + 100)
    plt.ylim(0, 1)
    plt.title(Title)
    plt.grid(True, which='both')
    ax.legend(loc='lower right')
    if fname:
        plt.savefig(fname, dpi=600.)
    else:
        plt.show()

예제 #15

0

파일 보기

파일: estatistica.py 프로젝트: CarolinaArdana/Bla

    def EstimaFrequencias(self, Parametros):
        if self.tipoSerie == 'Parcial':
            limite = lp.LimiteParcial(self.dadoSerie).AchaLimite(2)
            Parciais = se.Series(self.dadoSerie).serieMaxParcial(limite)
            datasP, PicosParciais = se.Series(Parciais).separaDados()
            PicosParciais.sort(reverse = True)
            print(PicosParciais)
            frequencias = genpareto.cdf(PicosParciais, Parametros[0],
                                        loc = Parametros[1],
                                        scale = Parametros[2])

        elif self.tipoSerie == 'Anual':
            Anuais = se.Series(self.dadoSerie).serieMaxAnual()
            datasA, PicosAnuais = se.Series(Anuais).separaDados()
            PicosAnuais.sort(reverse = True)
            print(PicosAnuais)
            frequencias = genextreme.cdf(PicosAnuais, Parametros[0],
                                            loc = Parametros[1],
                                            scale = Parametros[2])
        return frequencias

예제 #16

0

파일 보기

    print(i, RR_L, RR_SA)
    L_return = 40 / RR_L

    SA_return = 40 / RR_SA

    #L_rain = pd.read_csv('/Users/Jasper/Lesotho-ERA5.csv')
    #SA_rain = pd.read_csv('/Users/Jasper/SA-ERA5.csv')

    return_period = np.linspace(1, len(L_rain), len(L_rain))
    return_period = return_period / (len(return_period) + 1)
    L_rain = L_rain.sort_values(by=['JFM_prec'])
    SA_rain = SA_rain.sort_values(by=['JFM_prec'])

    shape_SA, loc_SA, scale_SA = gev.fit(SA_rain['JFM_prec'])
    xx_SA = np.linspace(100, 1000, 1000)
    yy_SA = 1 / (gev.cdf(xx_SA, shape_SA, loc_SA, scale_SA))

    shape_L, loc_L, scale_L = gev.fit(L_rain['JFM_prec'])
    xx_L = np.linspace(100, 1000, 1000)
    yy_L = 1 / (gev.cdf(xx_L, shape_L, loc_L, scale_L))

    ### find the index
    id_SA_return1 = (np.abs(yy_SA - SA_return)).argmin()
    val_SA_return = xx_SA[id_SA_return1]

    id_L_return1 = (np.abs(yy_L - L_return)).argmin()
    val_L_return = xx_L[id_L_return1]

    ### find the index
    id_SA_return2 = (np.abs(yy_SA - 40)).argmin()
    val_SA_return_ACT = xx_SA[id_SA_return2]

예제 #17

0

파일 보기

파일: extremal.py 프로젝트: gdfa-ugr/protocol

def extremal_distribution_fit(data,
                              var_name,
                              sample,
                              threshold,
                              fit_type,
                              x_min,
                              x_max,
                              n_points,
                              loc=None,
                              scale=None,
                              cumulative=True):
    # Initialization of the output variables
    param = None
    x = None
    y = None
    y_rp = None

    if fit_type == 'gpd':
        # Fit the exceedances over threshold to Generalized Pareto distribution
        param = generalized_pareto_distribution_fit(sample, threshold, loc,
                                                    scale)

        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        if cumulative:
            y = genpareto.cdf(x, param[0], param[1], param[2])

            # Calculate the number of extreme peaks per year
            n_peaks_year = len(sample) / len(
                data[var_name].index.year.unique())
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genpareto.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'coles':
        # Fit the exceedances over threshold to Generalized Pareto distribution
        param = generalized_pareto_distribution_fit(sample, threshold, loc,
                                                    scale)

        x = np.arange(1, 501)
        u = param[1]
        sigma = param[2]
        xi = param[0]

        # Mean number of data in a year (numero medio de datos en un año)
        n_y = len(data[var_name]) / len(data[var_name].index.year.unique())
        # Total number of POT / number of years
        z_u = len(sample) / len(data[var_name])
        # n_y*z_u is the number of POT / number of years -- > numer of POT per year
        y_rp = u + (sigma / xi) * (((x * n_y * z_u)**xi) - 1)

    elif fit_type == 'gev':
        param = generalized_extreme_value_distribution_fit(sample, loc, scale)

        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        if cumulative:
            y = genextreme.cdf(x, param[0], param[1], param[2])

            # Calculate the number of extreme peaks per year
            n_peaks_year = 1
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genpareto.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'poisson':
        # Calculate the pdf and/or cdf
        x = np.linspace(x_min, x_max, n_points)

        # Fit the exceedances over threshold to Generalized Pareto distribution
        gpd_param = generalized_pareto_distribution_fit(
            sample, threshold, loc, scale)

        # Poisson parameter (número de eventos extraños al año)
        poisspareto_param = len(sample) / len(
            data[var_name].index.year.unique())
        # Poisson pareto parameters
        poisspareto_param = [
            poisspareto_param, gpd_param[0], gpd_param[2], gpd_param[1]
        ]
        # Equivalent gev parameters
        param = [0, 0, 0]
        param[0] = -poisspareto_param[1]
        param[1] = poisspareto_param[2] * (poisspareto_param[0]**
                                           poisspareto_param[1])
        param[2] = poisspareto_param[3] + (
            (poisspareto_param[2] / poisspareto_param[1]) *
            ((poisspareto_param[0]**poisspareto_param[1]) - 1))

        if cumulative:
            y = genextreme.cdf(x, param[0], param[2], param[1])

            # Calculate the number of extreme peaks per year
            n_peaks_year = 1
            y_rp = return_period_curve(n_peaks_year, y)
        else:
            y = genextreme.pdf(x, param[0], param[2], param[1])

    return param, x, y, y_rp

예제 #18

0

파일 보기

파일: hminputs.py 프로젝트: nguyetlm/Hapi

    def StatisticalProperties(self,
                              PathNodes,
                              PathTS,
                              StartDate,
                              WarmUpPeriod,
                              SavePlots,
                              SavePath,
                              SeparateFiles=False,
                              Filter=False,
                              Distibution="GEV",
                              EstimateParameters=False,
                              Quartile=0,
                              RIMResults=False,
                              SignificanceLevel=0.1):
        """
        =============================================================================
          StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath,
                              SeparateFiles = False, Filter = False, RIMResults = False)
        =============================================================================

        StatisticalProperties method reads the SWIM output file (.dat file) that
        contains the time series of discharge for some computational nodes
        and calculate some statistical properties

        the code assumes that the time series are of a daily temporal resolution, and
        that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH).

        Parameters
        ----------
            1-PathNodes : [String]
                the name of the file which contains the ID of the computational
                nodes you want to do the statistical analysis for, the ObservedFile
                should contain the discharge time series of these nodes in order.
            2-PathTS : [String]
                the name of the SWIM result file (the .dat file).
            3-StartDate : [string]
                the begining date of the time series.
            4-WarmUpPeriod : [integer]
                the number of days you want to neglect at the begining of the
                Simulation (warm up period).
            5-SavePlots : [Bool]
                DESCRIPTION.
            6-SavePath : [String]
                the path where you want to  save the statistical properties.
            7-SeparateFiles: [Bool]
                if the discharge data are stored in separate files not all in one file
                SeparateFiles should be True, default [False].
            8-Filter: [Bool]
                for observed or RIMresult data it has gaps of times where the
                model did not run or gaps in the observed data if these gap days
                are filled with a specific value and you want to ignore it here
                give Filter = Value you want
            9-RIMResults: [Bool]
                If the files are results form RIM or observed, as the format
                differes between the two. default [False]

        Returns
        -------
            1-Statistical Properties.csv:
                file containing some statistical properties like mean, std, min, 5%, 25%,
                median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50,
                q100, q200, q500.
        """

        ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16)
        # hydrographs
        if SeparateFiles:
            TS = pd.DataFrame()
            if RIMResults:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = self.ReadRIMResult(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')
            else:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = np.loadtxt(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')  #,skiprows = 0

            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            ind = pd.date_range(StartDate, EndDate)
            TS.index = ind
        else:
            TS = pd.read_csv(PathTS, delimiter=r'\s+', header=None)
            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            TS.index = pd.date_range(StartDate, EndDate, freq="D")
            # delete the first two columns
            del TS[0], TS[1]
            TS.columns = ComputationalNodes

        # neglect the first year (warmup year) in the time series
        TS = TS.loc[StartDate + dt.timedelta(days=WarmUpPeriod):EndDate, :]

        # List of the table output, including some general data and the return periods.
        col_csv = [
            'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max',
            't_beg', 't_end', 'nyr'
        ]
        rp_name = [
            'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500',
            'q1000'
        ]
        col_csv = col_csv + rp_name

        # In a table where duplicates are removed (np.unique), find the number of
        # gauges contained in the .csv file.
        # no_gauge = len(ComputationalNodes)
        # Declare a dataframe for the output file, with as index the gaugne numbers
        # and as columns all the output names.
        StatisticalPr = pd.DataFrame(np.nan,
                                     index=ComputationalNodes,
                                     columns=col_csv)
        StatisticalPr.index.name = 'ID'
        DistributionPr = pd.DataFrame(np.nan,
                                      index=ComputationalNodes,
                                      columns=['loc', 'scale'])
        DistributionPr.index.name = 'ID'
        # required return periods
        T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500, 1000]
        T = np.array(T)
        # these values are the Non Exceedance probability (F) of the chosen
        # return periods F = 1 - (1/T)
        # Non Exceedance propabilities
        #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
        F = 1 - (1 / T)
        # Iteration over all the gauge numbers.
        for i in ComputationalNodes:
            QTS = TS.loc[:, i]
            # The time series is resampled to the annual maxima, and turned into a
            # numpy array.
            # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH).
            amax = QTS.resample('A-OCT').max().values

            if type(Filter) != bool:
                amax = amax[amax != Filter]
            if EstimateParameters:
                # estimate the parameters through an optimization
                # alpha = (np.sqrt(6) / np.pi) * amax.std()
                # beta = amax.mean() - 0.5772 * alpha
                # param_dist = [beta, alpha]
                threshold = np.quantile(amax, Quartile)
                if Distibution == "GEV":
                    print("Still to be finished later")
                else:
                    param = Gumbel.EstimateParameter(amax, Gumbel.ObjectiveFn,
                                                     threshold)
                    param_dist = [param[1], param[2]]

            else:
                # estimate the parameters through an maximum liklehood method
                if Distibution == "GEV":
                    param_dist = genextreme.fit(amax)
                else:
                    # A gumbel distribution is fitted to the annual maxima
                    param_dist = gumbel_r.fit(amax)

            if Distibution == "GEV":
                DistributionPr.loc[i, 'c'] = param_dist[0]
                DistributionPr.loc[i, 'loc'] = param_dist[1]
                DistributionPr.loc[i, 'scale'] = param_dist[2]
            else:
                DistributionPr.loc[i, 'loc'] = param_dist[0]
                DistributionPr.loc[i, 'scale'] = param_dist[1]

            # Return periods from the fitted distribution are stored.
            # get the Discharge coresponding to the return periods
            if Distibution == "GEV":
                Qrp = genextreme.ppf(F,
                                     param_dist[0],
                                     loc=param_dist[1],
                                     scale=param_dist[2])
            else:
                Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1])
            # to get the Non Exceedance probability for a specific Value
            # sort the amax
            amax.sort()
            # calculate the F (Exceedence probability based on weibul)
            cdf_Weibul = ST.Weibul(amax)
            # Gumbel.ProbapilityPlot method calculates the theoretical values based on the Gumbel distribution
            # parameters, theoretical cdf (or weibul), and calculate the confidence interval
            if Distibution == "GEV":
                Qth, Qupper, Qlower = GEV.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = genextreme.pdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[2],
                                            scale=param_dist[2])
                cdf_fitted = genextreme.cdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[1],
                                            scale=param_dist[2])
            else:
                Qth, Qupper, Qlower = Gumbel.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # gumbel_r.interval(SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = gumbel_r.pdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
                cdf_fitted = gumbel_r.cdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
            # then calculate the the T (return period) T = 1/(1-F)
            if SavePlots:
                fig = plt.figure(60, figsize=(20, 10))
                gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
                # Plot the histogram and the fitted distribution, save it for each gauge.
                ax1 = fig.add_subplot(gs[0, 0])
                ax1.plot(Qx, pdf_fitted, 'r-')
                ax1.hist(amax, density=True)
                ax1.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax1.set_ylabel('pdf', fontsize=15)

                ax2 = fig.add_subplot(gs[0, 1])
                ax2.plot(Qx, cdf_fitted, 'r-')
                ax2.plot(amax, cdf_Weibul, '.-')
                ax2.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax2.set_ylabel('cdf', fontsize=15)

                plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png',
                            format='png')
                plt.close()

                fig = plt.figure(70, figsize=(10, 8))
                plt.plot(Qth,
                         amax,
                         'd',
                         color='#606060',
                         markersize=12,
                         label='Gumbel Distribution')
                plt.plot(Qth,
                         Qth,
                         '^-.',
                         color="#3D59AB",
                         label="Weibul plotting position")
                if Distibution != "GEV":
                    plt.plot(Qth,
                             Qlower,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Lower limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")
                    plt.plot(Qth,
                             Qupper,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Upper limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")

                plt.legend(fontsize=15, framealpha=1)
                plt.xlabel('Theoretical Annual Discharge(m3/s)', fontsize=15)
                plt.ylabel('Annual Discharge(m3/s)', fontsize=15)
                plt.savefig(SavePath + "/" + "Figures/F-" + str(i) + '.png',
                            format='png')
                plt.close()

            StatisticalPr.loc[i, 'mean'] = QTS.mean()
            StatisticalPr.loc[i, 'std'] = QTS.std()
            StatisticalPr.loc[i, 'min'] = QTS.min()
            StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05)
            StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25)
            StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50)
            StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75)
            StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95)
            StatisticalPr.loc[i, 'max'] = QTS.max()
            StatisticalPr.loc[i, 't_beg'] = QTS.index.min()
            StatisticalPr.loc[i, 't_end'] = QTS.index.max()
            StatisticalPr.loc[
                i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] -
                             StatisticalPr.loc[i, 't_beg']).days / 365.25
            for irp, irp_name in zip(Qrp, rp_name):
                StatisticalPr.loc[i, irp_name] = irp

            # Print for prompt and check progress.
            print("Gauge", i, "done.")
        #
        # Output file
        StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv")
        self.StatisticalPr = StatisticalPr
        DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv")
        self.DistributionPr = DistributionPr

예제 #19

0

파일 보기

# Display the probability density function (``pdf``):

x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genextreme(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genextreme.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c))
# True

# Generate random numbers:

r = genextreme.rvs(c, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()