Python ppf 예제들, scipy.stats.gumbel_r.ppf Python 예제들

예제 #1

0

파일 보기

 def gumbelAdcSignals(self):
     self.baseLine = np.random.rand(int(self.numSamples)) * 1.0e-2
     self.hitJudge = np.random.random()
     if (self.hitJudge < 0.5):  # simulate no hit
         self.adcSamples = self.baseLine
     elif (self.hitJudge >= 0.5):  # simulate hit
         self.gumbelMean = np.random.random()
         self.gumbelBeta = np.random.random()
         self.gumbelPpf = np.linspace(gumbel_r.ppf(0.001, loc=self.gumbelMean, scale=self.gumbelBeta),
                                      gumbel_r.ppf(0.999, loc=self.gumbelMean, scale=self.gumbelBeta), 100)
         self.gumbelPdf = gumbel_r.pdf(self.gumbelPpf, loc=self.gumbelMean, scale=self.gumbelBeta)
         self.adcSamples = np.insert(self.baseLine, self.randTimeIndex, self.gumbelPdf)[:int(self.numSamples)]

예제 #2

0

파일 보기

        def _saturated_score(self, predictions, response, case_weights=None):

            if response.ndim == 2:
                successes = response[:, 0]
                trials = response[:, 1]
            else:
                successes = response
                trials = None

            loss = lambda yhat: cloglog_loglike(
                successes.shape,
                successes,
                trials=trials,
                case_weights=case_weights).smooth_objective(yhat, 'func')

            # factor of 2 to form proper deviance (default is negative log-likelihood,
            # while deviance is 2 * negative log-likelihood
            # negative sign is to align with sklearn's maximizing a score with grid search

            if self.score_method == 'deviance':
                return -2 * loss(predictions)
            elif self.score_method == 'mean_deviance':
                return -2 * loss(predictions)
            elif self.score_method == 'R2':
                SSE = 2 * loss(predictions)
                pi_0 = response.mean()
                cloglog_0 = gumbel_r.ppf(pi_0)
                SST = 2 * loss(
                    cloglog_0 *
                    np.ones_like(response))  # X: correct for cloglog?
                return 1 - SSE / SST
            elif self.score_method == 'accuracy':
                labels = predictions > 0
                return np.mean(labels == response)
            else:
                return np.nan

예제 #3

0

파일 보기

파일: hminputs.py 프로젝트: nguyetlm/Hapi

    def StatisticalProperties(self,
                              PathNodes,
                              PathTS,
                              StartDate,
                              WarmUpPeriod,
                              SavePlots,
                              SavePath,
                              SeparateFiles=False,
                              Filter=False,
                              Distibution="GEV",
                              EstimateParameters=False,
                              Quartile=0,
                              RIMResults=False,
                              SignificanceLevel=0.1):
        """
        =============================================================================
          StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath,
                              SeparateFiles = False, Filter = False, RIMResults = False)
        =============================================================================

        StatisticalProperties method reads the SWIM output file (.dat file) that
        contains the time series of discharge for some computational nodes
        and calculate some statistical properties

        the code assumes that the time series are of a daily temporal resolution, and
        that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH).

        Parameters
        ----------
            1-PathNodes : [String]
                the name of the file which contains the ID of the computational
                nodes you want to do the statistical analysis for, the ObservedFile
                should contain the discharge time series of these nodes in order.
            2-PathTS : [String]
                the name of the SWIM result file (the .dat file).
            3-StartDate : [string]
                the begining date of the time series.
            4-WarmUpPeriod : [integer]
                the number of days you want to neglect at the begining of the
                Simulation (warm up period).
            5-SavePlots : [Bool]
                DESCRIPTION.
            6-SavePath : [String]
                the path where you want to  save the statistical properties.
            7-SeparateFiles: [Bool]
                if the discharge data are stored in separate files not all in one file
                SeparateFiles should be True, default [False].
            8-Filter: [Bool]
                for observed or RIMresult data it has gaps of times where the
                model did not run or gaps in the observed data if these gap days
                are filled with a specific value and you want to ignore it here
                give Filter = Value you want
            9-RIMResults: [Bool]
                If the files are results form RIM or observed, as the format
                differes between the two. default [False]

        Returns
        -------
            1-Statistical Properties.csv:
                file containing some statistical properties like mean, std, min, 5%, 25%,
                median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50,
                q100, q200, q500.
        """

        ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16)
        # hydrographs
        if SeparateFiles:
            TS = pd.DataFrame()
            if RIMResults:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = self.ReadRIMResult(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')
            else:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = np.loadtxt(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')  #,skiprows = 0

            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            ind = pd.date_range(StartDate, EndDate)
            TS.index = ind
        else:
            TS = pd.read_csv(PathTS, delimiter=r'\s+', header=None)
            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            TS.index = pd.date_range(StartDate, EndDate, freq="D")
            # delete the first two columns
            del TS[0], TS[1]
            TS.columns = ComputationalNodes

        # neglect the first year (warmup year) in the time series
        TS = TS.loc[StartDate + dt.timedelta(days=WarmUpPeriod):EndDate, :]

        # List of the table output, including some general data and the return periods.
        col_csv = [
            'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max',
            't_beg', 't_end', 'nyr'
        ]
        rp_name = [
            'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500',
            'q1000'
        ]
        col_csv = col_csv + rp_name

        # In a table where duplicates are removed (np.unique), find the number of
        # gauges contained in the .csv file.
        # no_gauge = len(ComputationalNodes)
        # Declare a dataframe for the output file, with as index the gaugne numbers
        # and as columns all the output names.
        StatisticalPr = pd.DataFrame(np.nan,
                                     index=ComputationalNodes,
                                     columns=col_csv)
        StatisticalPr.index.name = 'ID'
        DistributionPr = pd.DataFrame(np.nan,
                                      index=ComputationalNodes,
                                      columns=['loc', 'scale'])
        DistributionPr.index.name = 'ID'
        # required return periods
        T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500, 1000]
        T = np.array(T)
        # these values are the Non Exceedance probability (F) of the chosen
        # return periods F = 1 - (1/T)
        # Non Exceedance propabilities
        #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
        F = 1 - (1 / T)
        # Iteration over all the gauge numbers.
        for i in ComputationalNodes:
            QTS = TS.loc[:, i]
            # The time series is resampled to the annual maxima, and turned into a
            # numpy array.
            # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH).
            amax = QTS.resample('A-OCT').max().values

            if type(Filter) != bool:
                amax = amax[amax != Filter]
            if EstimateParameters:
                # estimate the parameters through an optimization
                # alpha = (np.sqrt(6) / np.pi) * amax.std()
                # beta = amax.mean() - 0.5772 * alpha
                # param_dist = [beta, alpha]
                threshold = np.quantile(amax, Quartile)
                if Distibution == "GEV":
                    print("Still to be finished later")
                else:
                    param = Gumbel.EstimateParameter(amax, Gumbel.ObjectiveFn,
                                                     threshold)
                    param_dist = [param[1], param[2]]

            else:
                # estimate the parameters through an maximum liklehood method
                if Distibution == "GEV":
                    param_dist = genextreme.fit(amax)
                else:
                    # A gumbel distribution is fitted to the annual maxima
                    param_dist = gumbel_r.fit(amax)

            if Distibution == "GEV":
                DistributionPr.loc[i, 'c'] = param_dist[0]
                DistributionPr.loc[i, 'loc'] = param_dist[1]
                DistributionPr.loc[i, 'scale'] = param_dist[2]
            else:
                DistributionPr.loc[i, 'loc'] = param_dist[0]
                DistributionPr.loc[i, 'scale'] = param_dist[1]

            # Return periods from the fitted distribution are stored.
            # get the Discharge coresponding to the return periods
            if Distibution == "GEV":
                Qrp = genextreme.ppf(F,
                                     param_dist[0],
                                     loc=param_dist[1],
                                     scale=param_dist[2])
            else:
                Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1])
            # to get the Non Exceedance probability for a specific Value
            # sort the amax
            amax.sort()
            # calculate the F (Exceedence probability based on weibul)
            cdf_Weibul = ST.Weibul(amax)
            # Gumbel.ProbapilityPlot method calculates the theoretical values based on the Gumbel distribution
            # parameters, theoretical cdf (or weibul), and calculate the confidence interval
            if Distibution == "GEV":
                Qth, Qupper, Qlower = GEV.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = genextreme.pdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[2],
                                            scale=param_dist[2])
                cdf_fitted = genextreme.cdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[1],
                                            scale=param_dist[2])
            else:
                Qth, Qupper, Qlower = Gumbel.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # gumbel_r.interval(SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = gumbel_r.pdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
                cdf_fitted = gumbel_r.cdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
            # then calculate the the T (return period) T = 1/(1-F)
            if SavePlots:
                fig = plt.figure(60, figsize=(20, 10))
                gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
                # Plot the histogram and the fitted distribution, save it for each gauge.
                ax1 = fig.add_subplot(gs[0, 0])
                ax1.plot(Qx, pdf_fitted, 'r-')
                ax1.hist(amax, density=True)
                ax1.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax1.set_ylabel('pdf', fontsize=15)

                ax2 = fig.add_subplot(gs[0, 1])
                ax2.plot(Qx, cdf_fitted, 'r-')
                ax2.plot(amax, cdf_Weibul, '.-')
                ax2.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax2.set_ylabel('cdf', fontsize=15)

                plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png',
                            format='png')
                plt.close()

                fig = plt.figure(70, figsize=(10, 8))
                plt.plot(Qth,
                         amax,
                         'd',
                         color='#606060',
                         markersize=12,
                         label='Gumbel Distribution')
                plt.plot(Qth,
                         Qth,
                         '^-.',
                         color="#3D59AB",
                         label="Weibul plotting position")
                if Distibution != "GEV":
                    plt.plot(Qth,
                             Qlower,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Lower limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")
                    plt.plot(Qth,
                             Qupper,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Upper limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")

                plt.legend(fontsize=15, framealpha=1)
                plt.xlabel('Theoretical Annual Discharge(m3/s)', fontsize=15)
                plt.ylabel('Annual Discharge(m3/s)', fontsize=15)
                plt.savefig(SavePath + "/" + "Figures/F-" + str(i) + '.png',
                            format='png')
                plt.close()

            StatisticalPr.loc[i, 'mean'] = QTS.mean()
            StatisticalPr.loc[i, 'std'] = QTS.std()
            StatisticalPr.loc[i, 'min'] = QTS.min()
            StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05)
            StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25)
            StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50)
            StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75)
            StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95)
            StatisticalPr.loc[i, 'max'] = QTS.max()
            StatisticalPr.loc[i, 't_beg'] = QTS.index.min()
            StatisticalPr.loc[i, 't_end'] = QTS.index.max()
            StatisticalPr.loc[
                i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] -
                             StatisticalPr.loc[i, 't_beg']).days / 365.25
            for irp, irp_name in zip(Qrp, rp_name):
                StatisticalPr.loc[i, irp_name] = irp

            # Print for prompt and check progress.
            print("Gauge", i, "done.")
        #
        # Output file
        StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv")
        self.StatisticalPr = StatisticalPr
        DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv")
        self.DistributionPr = DistributionPr

예제 #4

0

파일 보기

파일: stat.py 프로젝트: PySFE/SfePraPy

def Finv_Gumbel(r, m, s):
    scale, loc = p_Gumbel(m, s)

    return gumbel_r.ppf(r, loc, scale)

예제 #5

0

파일 보기

파일: scipy-stats-gumbel_r-1.py 프로젝트: tiantianxia/pythonProgram

from scipy.stats import gumbel_r
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

mean, var, skew, kurt = gumbel_r.stats(moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(gumbel_r.ppf(0.01), gumbel_r.ppf(0.99), 100)
ax.plot(x, gumbel_r.pdf(x), 'r-', lw=5, alpha=0.6, label='gumbel_r pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = gumbel_r()
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = gumbel_r.ppf([0.001, 0.5, 0.999])
np.allclose([0.001, 0.5, 0.999], gumbel_r.cdf(vals))
# True

# Generate random numbers:

r = gumbel_r.rvs(size=1000)

예제 #6

0

파일 보기

ax.set_title('Tp')

#%% Extremes

ericeira_wts.plot_timeseries()
hmax = ericeira_wts.maxima()
hmax.plot()

#%%
from scipy.stats import gumbel_r
from scipy.stats import probplot
import statsmodels.distributions

loc, scale = gumbel_r.fit(hmax)
fig, ax = plt.subplots()
x = np.linspace(gumbel_r.ppf(0.01, loc=loc, scale=scale),
                gumbel_r.ppf(0.99, loc=loc, scale=scale), 100)
ax.plot(x,
        gumbel_r.pdf(x, loc=loc, scale=scale),
        'r-',
        lw=5,
        alpha=0.6,
        label='gumbel_r pdf')
ax.hist(hmax, density=True)

fig, ax = plt.subplots()
ax.plot(x,
        gumbel_r.cdf(x, loc=loc, scale=scale),
        'r-',
        lw=5,
        alpha=0.6,

예제 #7

0

파일 보기

    def StatisticalProperties(self,
                              PathNodes,
                              PathTS,
                              StartDate,
                              WarmUpPeriod,
                              SavePlots,
                              SavePath,
                              SeparateFiles=False,
                              Filter=False):
        """
        =============================================================================
          StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, saveto)
        =============================================================================

        StatisticalProperties method reads the SWIM output file (.dat file) that
        contains the time series of discharge for some computational nodes
        and calculate some statistical properties

        the code assumes that the time series are of a daily temporal resolution, and
        that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH).

        Parameters
        ----------
            1-PathNodes : [String]
                the name of the file which contains the ID of the computational
                nodes you want to do the statistical analysis for, the ObservedFile
                should contain the discharge time series of these nodes in order.
            2-PathTS : [String]
                the name of the SWIM result file (the .dat file).
            3-StartDate : [string]
                the begining date of the time series.
            4-WarmUpPeriod : [integer]
                the number of days you want to neglect at the begining of the
                Simulation (warm up period).
            5-SavePlots : [Bool]
                DESCRIPTION.
            6-SavePath : [String]
                the path where you want to  save the statistical properties.

        Returns
        -------
            1-Statistical Properties.csv:
                file containing some statistical properties like mean, std, min, 5%, 25%,
                median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50,
                q100, q200, q500.
        """

        ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16)
        # hydrographs
        if SeparateFiles:
            ObservedTS = pd.DataFrame()

            for i in range(len(ComputationalNodes)):
                ObservedTS.loc[:, int(ComputationalNodes[i])] = np.loadtxt(
                    PathTS + "/" + str(int(ComputationalNodes[i])) +
                    '.txt')  #,skiprows = 0

            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=ObservedTS.shape[0] - 1)
            ind = pd.date_range(StartDate, EndDate)
            ObservedTS.index = ind
        else:
            ObservedTS = pd.read_csv(PathTS, delimiter=r'\s+', header=None)
            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=ObservedTS.shape[0] - 1)
            ObservedTS.index = pd.date_range(StartDate, EndDate, freq="D")
            # delete the first two columns
            del ObservedTS[0], ObservedTS[1]
            ObservedTS.columns = ComputationalNodes

        # neglect the first year (warmup year) in the time series
        ObservedTS = ObservedTS.loc[StartDate +
                                    dt.timedelta(days=WarmUpPeriod):EndDate, :]

        # List of the table output, including some general data and the return periods.
        col_csv = [
            'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max',
            't_beg', 't_end', 'nyr'
        ]
        rp_name = [
            'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500'
        ]
        col_csv = col_csv + rp_name

        # In a table where duplicates are removed (np.unique), find the number of
        # gauges contained in the .csv file.
        # no_gauge = len(ComputationalNodes)
        # Declare a dataframe for the output file, with as index the gaugne numbers
        # and as columns all the output names.
        StatisticalPr = pd.DataFrame(np.nan,
                                     index=ComputationalNodes,
                                     columns=col_csv)
        StatisticalPr.index.name = 'ID'
        DistributionPr = pd.DataFrame(np.nan,
                                      index=ComputationalNodes,
                                      columns=['loc', 'scale'])
        DistributionPr.index.name = 'ID'
        # required return periods
        T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500]
        T = np.array(T)
        # these values are the Non Exceedance probability (F) of the chosen
        # return periods F = 1 - (1/T)
        # Non Exceedance propabilities
        #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
        F = 1 - (1 / T)
        # Iteration over all the gauge numbers.
        for i in ComputationalNodes:
            QTS = ObservedTS.loc[:, i]
            # The time series is resampled to the annual maxima, and turned into a
            # numpy array.
            # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH).
            amax = QTS.resample('A-OCT').max().values
            if type(Filter) != bool:
                amax = amax[amax != Filter]
            # A gumbel distribution is fitted to the annual maxima
            param_dist = gumbel_r.fit(amax)
            DistributionPr.loc[i, 'loc'] = param_dist[0]
            DistributionPr.loc[i, 'scale'] = param_dist[1]
            # Return periods from the fitted distribution are stored.
            # get the Discharge coresponding to the return periods
            Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1])
            # to get the Non Exceedance probability for a specific Value
            #gumbel_r.cdf(Qrp, loc=param_dist[0], scale=param_dist[1])
            # then calculate the the T (return period) T = 1/(1-F)

            # Plot the histogram and the fitted distribution, save it for each gauge.
            Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
            pdf_fitted = gumbel_r.pdf(Qx,
                                      loc=param_dist[0],
                                      scale=param_dist[1])
            if SavePlots:
                plt.plot(Qx, pdf_fitted, 'r-')
                plt.hist(amax, normed=True)
                plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png',
                            format='png')
                plt.close()

            StatisticalPr.loc[i, 'mean'] = QTS.mean()
            StatisticalPr.loc[i, 'std'] = QTS.std()
            StatisticalPr.loc[i, 'min'] = QTS.min()
            StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05)
            StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25)
            StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50)
            StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75)
            StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95)
            StatisticalPr.loc[i, 'max'] = QTS.max()
            StatisticalPr.loc[i, 't_beg'] = QTS.index.min()
            StatisticalPr.loc[i, 't_end'] = QTS.index.max()
            StatisticalPr.loc[
                i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] -
                             StatisticalPr.loc[i, 't_beg']).days / 365.25
            for irp, irp_name in zip(Qrp, rp_name):
                StatisticalPr.loc[i, irp_name] = irp

            # Print for prompt and check progress.
            print("Gauge", i, "done.")
        #
        # Output file
        StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv")
        self.StatisticalPr = StatisticalPr
        DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv")
        self.DistributionPr = DistributionPr

예제 #8

0

파일 보기

파일: sheet3.py 프로젝트: aechchiki/BionformaticsAlgorithms_MLS-P16

# calculate the number of samples smaller than the reference score
for score in permutation_scores:
    if score < reference_score:
        number_value_smaller +=1
# estimate the number of total samples + 1
number_samples = len(permutation_scores) +1
# calculate the p-value
p_value = 1-(float(number_value_smaller) / float(number_samples))
print(p_value )

# Task 5: Compute the associate p-value using an estimated gumble distribution

# estimate the parameter loc and scale using the fit function
loc,scale = gumbel_r.fit(permutation_scores)

# calculate the p-value
p_value = 1-gumbel_r.cdf(reference_score, loc=loc, scale=scale )
print(p_value)


# Task 6: Plot the histogram and the fitted probability density function with the reference score as vertical line

fig, ax = plt.subplots(1, 1)
x = np.linspace(gumbel_r.ppf(0.01,loc=loc, scale=scale),gumbel_r.ppf(0.99,loc=loc, scale=scale), 1000)

ax.plot(x, gumbel_r.pdf(x, loc=loc, scale=scale), 'k-', lw=2, label='frozen pdf')
ax.hist(permutation_scores, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
ax.axvline(reference_score)
plt.show()