コード例 #1
0
def sim_data(n: int, j: int, theta: np.array) -> dict:
    """Takes input values n and j to specify the shape of the output data. The
    k dimension is inferred from the length of theta. Creates a y column vector
    that are the choice that maximises utility, and a x matrix that are the 
    covariates, drawn from a random normal distribution.

    Args:
        n (int): Number of households.'
        j (int): Number of choices.
        theta (np.array): The true value of the coefficients.

    Returns:
        dict: Returns a dict with keys "y" and "x".
    """
    k = theta.size

    x = rng.normal(size=(n, j, k))
    v = x @ theta
    e = genextreme.ppf(rng.uniform(size=(n, j)), c=0)
    u = v + e

    # Find which choice that maximises value.
    u_index = u.argmax(axis=1)

    label = ['y', 'x']
    return dict(zip(label, [u_index, x]))
コード例 #2
0
def sim_data(n: int, j: int, theta: np.array) -> dict:
    """Takes input values n and j to specify the shape of the output data. The
    k dimension is inferred from the length of theta. Creates a y column vector
    that are the choice that maximises utility, and a x matrix that are the 
    covariates, drawn from a random normal distribution.

    Args:
        n (int): Number of households.
        j (int): Number of choices.
        theta (np.array): The true value of the coefficients.

    Returns:
        dict: Returns a dict with keys "y" and "x".
    """
    k = int(theta.size / (j - 1))
    const = np.ones((n, 1))
    x0 = rng.normal(size=(n, k - 1))
    x = np.hstack((const, x0))

    # There are three choices, but the first choice is the reference
    # category, and is therefore only filled with zeros.
    v = np.zeros((n, j))
    for i in range(1, j):
        v[:, i] = x @ theta[:, i - 1]
    e = genextreme.ppf(rng.uniform(size=(n, j)), c=0)
    u = v + e

    # Find which choice that maximises value.
    u_index = u.argmax(axis=1)

    label = ['y', 'x']
    return dict(zip(label, [u_index, x]))
コード例 #3
0
 def value(self, p, estimador=None):
     try:
         return genextreme.ppf(p, c=self.shape, loc=self.loc, scale=self.scale)
     except AttributeError:
         if estimador not in self.estimadores:
             raise ValueError('Estimador não existe')
         else:
             eval('self.' + estimador)()
         return self.value(p, estimador=estimador)
コード例 #4
0
    def get_coverage_interval(self, COEFF, distribution, coverage):
        interval_list = [None] * len(COEFF)
        lower_q = float(coverage) / 2.0
        upper_q = float(coverage) / 2.0
        yes = np.array([])
        no = np.array([])

        if distribution == 'gev':
            for sample_index in range(len(COEFF)):
                sub_interval = [None, None]
                shape = COEFF[sample_index][0]
                mu = COEFF[sample_index][1]
                sigma = COEFF[sample_index][2]
                lower = gev.ppf(lower_q, mu, sigma, shape)
                upper = gev.ppf(upper_q, mu, sigma, shape)
                sub_interval[0] = lower
                sub_interval[1] = upper
                interval_list[sample_index] = sub_interval
        return interval_list
コード例 #5
0
def plot_histograma_e_gev(str_fam_sinal,
                          df_sinais,
                          c,
                          loc,
                          scale,
                          num_inicio,
                          num_final,
                          num_total,
                          nome_coluna='valor'):
    arr_valores_atuais = df_sinais[nome_coluna].to_numpy()
    histogram, bins_edge = np.histogram(arr_valores_atuais, bins=20)

    width = 0.7 * (bins_edge[1] - bins_edge[0])
    center = (bins_edge[:-1] + bins_edge[1:]) / 2

    # plot histograma
    # fig, ax = plt.subplots(1, 1)
    fig, ax1 = plt.subplots()
    color = 'tab:blue'
    plt.bar(center, histogram, align='center', width=width)
    plt.title('Histograma da Série {}'.format(str_fam_sinal))
    plt.xlabel("bin")
    plt.ylabel("Quantidade")
    ax1.tick_params(axis='y', labelcolor=color)

    # plot PDF
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:ref'
    x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
    x = np.linspace(num_inicio, num_final, num_total)
    ax2.get_yaxis().set_ticks([])
    ax2.plot(x,
             genextreme.pdf(x, c, loc, scale),
             'r-',
             lw=5,
             alpha=0.6,
             label='genextreme pdf')

    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig("./histograma_familia_{}.png".format(str_fam_sinal))
    plt.show()
    plt.close()
コード例 #6
0
def calculate_required_effort(data_before_capture, n_bootstrapping,
                              success_probability):
    effort_per_sighting = calculate_effort_per_sighting(data_before_capture)
    n_effort_per_sighting = len(effort_per_sighting)
    required_effort: np.array = np.zeros(n_bootstrapping)
    for i in range(n_bootstrapping):
        resampled_effort_per_sighting = np.random.choice(
            effort_per_sighting, n_effort_per_sighting)
        fit = genextreme.fit(resampled_effort_per_sighting)
        required_effort[i] = genextreme.ppf(success_probability, fit[0],
                                            fit[1], fit[2])
    return required_effort
コード例 #7
0
    def plot(self, FAPname, Nlevels, cheat=True):
        '''Plots the periodogram with significance levels provided by 
        the bootstrap. Number of displayed significance levels adjusted
        with Nlevels. cheat shows the FAP calculated by astrop, as well
        as a marker showing a tabulated value for the frequency of the planets
        oscillation. Not finished code for calcualting the FAP based on
        the z-levels is still present.'''
        P = self.search()
        Ptop = np.amax(P)
        ftop = self.flist[np.where(P == Ptop)[0][0]]
        Levels = np.array([50, 90, 95, 99, 99.9])[:Nlevels]
        # Neff = 0
        # for i in range(self.Neval-2):
        #     if (P[i]<P[i+1]) & (P[i+1]>P[i+2]):
        #         Neff = Neff + 1
        # # print(Neff)
        # Neff = self.fmax * 1/(self.flist[1]-self.flist[0])
        # # print(Neff)

        FAPfile = np.loadtxt(FAPname + 'FAPNormTest.txt')
        #PLevels = scoreperc(FAPfile,Levels)

        fit = gev.fit(FAPfile)
        PLevels = gev.ppf(Levels / 100, *fit)

        plt.figure(figsize=(20, 14))
        plt.hlines(PLevels, self.fmin, self.fmax, 'g')
        plt.plot(self.flist, P)
        plt.text(self.fmax - (self.fmax - self.fmin) / 2.25,
                 plt.ylim()[1], 'False alarm probability')
        plt.ylim(0, Ptop + 0.1)
        for i in range(Nlevels):
            plt.text(self.fmax - (self.fmax - self.fmin) / 3,
                     PLevels[i] + 0.003, str(np.round(1 - Levels[i] / 100, 3)))
        plt.plot(ftop,
                 Ptop,
                 'r',
                 marker='o',
                 linestyle='none',
                 markerfacecolor='none',
                 markersize=35)
        if cheat == True:
            plt.vlines(1 / self.cheat, 0, Ptop, 'g')

            CheatLevels = self.APFAP(1 - Levels / 100)
            plt.hlines(CheatLevels, self.fmin, self.fmax, 'r')
        plt.xlabel('Frequency [1/day]')
        plt.ylabel('Lomb-Scargle Power')
        plt.title(
            'Lomb - Scargle Periodogram for {planet}'.format(planet=self.name))
        print('Highest probability of period = {p} days'.format(
            p=round(1 / ftop, 3)))
コード例 #8
0
def sim_data(N, J, theta) -> tuple:
    k = theta.size

    x = random.normal(size=(N, J, k)) + np.linspace(3, 5, J).reshape(1, J, 1)
    v = utiliy(theta, x)
    e = genextreme.ppf(random.uniform(size=(N, J)), c=0)
    u = v + e  # utility

    # Find which choice that maximizes utility.
    y = u.argmax(axis=1)

    label = ['y', 'x']
    d = dict(zip(label, [y, x]))
    return d
コード例 #9
0
def doGev(dis, retPerYr):
  prob = 1-1/retPerYr
  npt = dis.shape[1]
  nretper = len(retPerYr)
  retLev = np.ones([npt, nretper])*np.nan
  for ipt in range(npt):
    disii = dis[:,ipt]
    disii = disii[~np.isnan(disii)]
    if len(disii) > 15:
      shape, loc, scale = gev.fit(-disii)
      retLevII = -gev.ppf(prob, shape, loc=loc, scale=scale)
      if sum(retLevII < 0) == 0:
        retLev[ipt, :] = retLevII
  return retLev
コード例 #10
0
    def ProbapilityPlot(param, cdf, data, SignificanceLevel):
        """
        still not finished
        the equations are the same of the gumbel dist and have to be changed to
        GEV equations
        ===================================================================
            ProbapilityPlot(param, cdf, data, SignificanceLevel)
        ===================================================================
        this method calculates the theoretical values based on the Gumbel distribution
        parameters, theoretical cdf (or weibul), and calculate the confidence interval.

        Parameters
        ----------
        param : [list]
            list of the distribution parameters [loc, scale].
        cdf : [list]
            theoretical cdf calculated using weibul or using the distribution cdf function.
        data : [list/array]
            list of the values.
        SignificanceLevel : [float]
            value between 0 and 1.

        Returns
        -------
        Qth : [list]
            theoretical generated values based on the theoretical cdf calculated from
            weibul or the distribution parameters.
        Qupper : [list]
            upper bound coresponding to the confidence interval.
        Qlower : [list]
            lower bound coresponding to the confidence interval.
        """

        # Qth = [param[0] - param[1]*(np.log(-np.log(j))) for j in cdf]
        Qth = genextreme.ppf(cdf, c=param[0], loc=param[1], scale=param[2])
        Y = [-np.log(-np.log(j)) for j in cdf]
        StdError = [(param[1] / np.sqrt(len(data))) *
                    np.sqrt(1.1087 + 0.5140 * j + 0.6079 * j**2) for j in Y]
        v = norm.ppf(1 - SignificanceLevel / 2)
        Qupper = [Qth[j] + v * StdError[j] for j in range(len(data))]
        Qlower = [Qth[j] - v * StdError[j] for j in range(len(data))]

        return Qth, Qupper, Qlower
コード例 #11
0
ファイル: estatistica.py プロジェクト: CarolinaArdana/Bla
    def EstimaMagnitudes(self, Parametros):
        Quantis = []
        TRs = [1.000111,2,5,10,20,50]
        for TR in TRs:
            if self.tipoSerie == 'Parcial':
                Quantil = genpareto.ppf(1-(1/TR), Parametros[0],
                                        loc = Parametros[1],
                                        scale = Parametros[2])
                Quantis.append(Quantil)
                print('Tempo de Retorno: %i  '%TR)
                print('PARETO=> Magnitude: %.2f'%(Quantil))
            elif self.tipoSerie == 'Anual':
                Quantil = genextreme.ppf(1-(1/TR), Parametros[0],
                                         loc = Parametros[1],
                                         scale = Parametros[2])
                Quantis.append(Quantil)
                print('Tempo de Retorno: %i  '%TR)
                print('GEV=> Magnitude: %.2f' % (Quantil))

        return Quantis
コード例 #12
0
def fitGEV(x, Tmax):
    '''
    Fit a GEV distribution to the data in x. Inverse function values are calculateded for returnperiods up to Tmax.
    ---------------------------------------------------------------------------------------------------------------
    Input:
        x:        Pandas series of maxima
        Tmax:     Maximum return period to consider to fit GEV distribution for
    ---------------------------------------------------------------------------------------------------------------
    Returns:
        gev_fit:    Tuple of GEV fit parameters
        gev_inv:    Inverse of CDF for each T
    '''
    T = np.linspace(1, Tmax, 100000)

    probs = 1 / T
    #-initial guess of shape parameter
    c = 0
    #-fit GEV and calculate inverse
    gev_fit = genextreme.fit(x, c)
    gev_inv = genextreme.ppf(1 - probs, gev_fit[0], gev_fit[1], gev_fit[2])
    return gev_fit, gev_inv
コード例 #13
0
def sim_data(n: int, j: int, theta: np.array) -> dict:
    """Takes input values n and j to specify the shape of the output data. The
    k dimension is inferred from the length of theta. Creates a y column vector
    that are the choice that maximises utility, and a x matrix that are the 
    covariates, drawn from a random normal distribution.

    Args:
        n (int): Number of households.
        j (int): Number of choices.
        theta (np.array): The true value of the coefficients.

    Returns:
        dict: Returns a dict with keys "y" and "x".
    """
    k = int(theta.size / (j - 1))
    const = np.ones((n, 1))
    x0 = rng.normal(size=(n, k - 1))
    x = np.hstack((const, x0))

    # FILL IN
    # Initialize a v matrix that are filled with zeros,
    # and has shapes n, j

    # Then loop over the columns of v, the first column should not be
    # changed and therefore still be filled with zeros. The other
    # columns should be filled using x and the correct column from the
    # theta matrix.

    e = genextreme.ppf(rng.uniform(size=(n, j)), c=0)
    u = v + e

    # Find which choice that maximises value.
    u_index = u.argmax(axis=1)

    label = ['y', 'x']
    return dict(zip(label, [u_index, x]))
コード例 #14
0
ファイル: hminputs.py プロジェクト: nguyetlm/Hapi
    def StatisticalProperties(self,
                              PathNodes,
                              PathTS,
                              StartDate,
                              WarmUpPeriod,
                              SavePlots,
                              SavePath,
                              SeparateFiles=False,
                              Filter=False,
                              Distibution="GEV",
                              EstimateParameters=False,
                              Quartile=0,
                              RIMResults=False,
                              SignificanceLevel=0.1):
        """
        =============================================================================
          StatisticalProperties(PathNodes, PathTS, StartDate, WarmUpPeriod, SavePlots, SavePath,
                              SeparateFiles = False, Filter = False, RIMResults = False)
        =============================================================================

        StatisticalProperties method reads the SWIM output file (.dat file) that
        contains the time series of discharge for some computational nodes
        and calculate some statistical properties

        the code assumes that the time series are of a daily temporal resolution, and
        that the hydrological year is 1-Nov/31-Oct (Petrow and Merz, 2009, JoH).

        Parameters
        ----------
            1-PathNodes : [String]
                the name of the file which contains the ID of the computational
                nodes you want to do the statistical analysis for, the ObservedFile
                should contain the discharge time series of these nodes in order.
            2-PathTS : [String]
                the name of the SWIM result file (the .dat file).
            3-StartDate : [string]
                the begining date of the time series.
            4-WarmUpPeriod : [integer]
                the number of days you want to neglect at the begining of the
                Simulation (warm up period).
            5-SavePlots : [Bool]
                DESCRIPTION.
            6-SavePath : [String]
                the path where you want to  save the statistical properties.
            7-SeparateFiles: [Bool]
                if the discharge data are stored in separate files not all in one file
                SeparateFiles should be True, default [False].
            8-Filter: [Bool]
                for observed or RIMresult data it has gaps of times where the
                model did not run or gaps in the observed data if these gap days
                are filled with a specific value and you want to ignore it here
                give Filter = Value you want
            9-RIMResults: [Bool]
                If the files are results form RIM or observed, as the format
                differes between the two. default [False]

        Returns
        -------
            1-Statistical Properties.csv:
                file containing some statistical properties like mean, std, min, 5%, 25%,
                median, 75%, 95%, max, t_beg, t_end, nyr, q1.5, q2, q5, q10, q25, q50,
                q100, q200, q500.
        """

        ComputationalNodes = np.loadtxt(PathNodes, dtype=np.uint16)
        # hydrographs
        if SeparateFiles:
            TS = pd.DataFrame()
            if RIMResults:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = self.ReadRIMResult(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')
            else:
                for i in range(len(ComputationalNodes)):
                    TS.loc[:, int(ComputationalNodes[i])] = np.loadtxt(
                        PathTS + "/" + str(int(ComputationalNodes[i])) +
                        '.txt')  #,skiprows = 0

            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            ind = pd.date_range(StartDate, EndDate)
            TS.index = ind
        else:
            TS = pd.read_csv(PathTS, delimiter=r'\s+', header=None)
            StartDate = dt.datetime.strptime(StartDate, "%Y-%m-%d")
            EndDate = StartDate + dt.timedelta(days=TS.shape[0] - 1)
            TS.index = pd.date_range(StartDate, EndDate, freq="D")
            # delete the first two columns
            del TS[0], TS[1]
            TS.columns = ComputationalNodes

        # neglect the first year (warmup year) in the time series
        TS = TS.loc[StartDate + dt.timedelta(days=WarmUpPeriod):EndDate, :]

        # List of the table output, including some general data and the return periods.
        col_csv = [
            'mean', 'std', 'min', '5%', '25%', 'median', '75%', '95%', 'max',
            't_beg', 't_end', 'nyr'
        ]
        rp_name = [
            'q1.5', 'q2', 'q5', 'q10', 'q25', 'q50', 'q100', 'q200', 'q500',
            'q1000'
        ]
        col_csv = col_csv + rp_name

        # In a table where duplicates are removed (np.unique), find the number of
        # gauges contained in the .csv file.
        # no_gauge = len(ComputationalNodes)
        # Declare a dataframe for the output file, with as index the gaugne numbers
        # and as columns all the output names.
        StatisticalPr = pd.DataFrame(np.nan,
                                     index=ComputationalNodes,
                                     columns=col_csv)
        StatisticalPr.index.name = 'ID'
        DistributionPr = pd.DataFrame(np.nan,
                                      index=ComputationalNodes,
                                      columns=['loc', 'scale'])
        DistributionPr.index.name = 'ID'
        # required return periods
        T = [1.5, 2, 5, 10, 25, 50, 50, 100, 200, 500, 1000]
        T = np.array(T)
        # these values are the Non Exceedance probability (F) of the chosen
        # return periods F = 1 - (1/T)
        # Non Exceedance propabilities
        #F = [1/3, 0.5, 0.8, 0.9, 0.96, 0.98, 0.99, 0.995, 0.998]
        F = 1 - (1 / T)
        # Iteration over all the gauge numbers.
        for i in ComputationalNodes:
            QTS = TS.loc[:, i]
            # The time series is resampled to the annual maxima, and turned into a
            # numpy array.
            # The hydrological year is 1-Nov/31-Oct (from Petrow and Merz, 2009, JoH).
            amax = QTS.resample('A-OCT').max().values

            if type(Filter) != bool:
                amax = amax[amax != Filter]
            if EstimateParameters:
                # estimate the parameters through an optimization
                # alpha = (np.sqrt(6) / np.pi) * amax.std()
                # beta = amax.mean() - 0.5772 * alpha
                # param_dist = [beta, alpha]
                threshold = np.quantile(amax, Quartile)
                if Distibution == "GEV":
                    print("Still to be finished later")
                else:
                    param = Gumbel.EstimateParameter(amax, Gumbel.ObjectiveFn,
                                                     threshold)
                    param_dist = [param[1], param[2]]

            else:
                # estimate the parameters through an maximum liklehood method
                if Distibution == "GEV":
                    param_dist = genextreme.fit(amax)
                else:
                    # A gumbel distribution is fitted to the annual maxima
                    param_dist = gumbel_r.fit(amax)

            if Distibution == "GEV":
                DistributionPr.loc[i, 'c'] = param_dist[0]
                DistributionPr.loc[i, 'loc'] = param_dist[1]
                DistributionPr.loc[i, 'scale'] = param_dist[2]
            else:
                DistributionPr.loc[i, 'loc'] = param_dist[0]
                DistributionPr.loc[i, 'scale'] = param_dist[1]

            # Return periods from the fitted distribution are stored.
            # get the Discharge coresponding to the return periods
            if Distibution == "GEV":
                Qrp = genextreme.ppf(F,
                                     param_dist[0],
                                     loc=param_dist[1],
                                     scale=param_dist[2])
            else:
                Qrp = gumbel_r.ppf(F, loc=param_dist[0], scale=param_dist[1])
            # to get the Non Exceedance probability for a specific Value
            # sort the amax
            amax.sort()
            # calculate the F (Exceedence probability based on weibul)
            cdf_Weibul = ST.Weibul(amax)
            # Gumbel.ProbapilityPlot method calculates the theoretical values based on the Gumbel distribution
            # parameters, theoretical cdf (or weibul), and calculate the confidence interval
            if Distibution == "GEV":
                Qth, Qupper, Qlower = GEV.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = genextreme.pdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[2],
                                            scale=param_dist[2])
                cdf_fitted = genextreme.cdf(Qx,
                                            param_dist[0],
                                            loc=param_dist[1],
                                            scale=param_dist[2])
            else:
                Qth, Qupper, Qlower = Gumbel.ProbapilityPlot(
                    param_dist, cdf_Weibul, amax, SignificanceLevel)
                # gumbel_r.interval(SignificanceLevel)
                # to calculate the F theoretical
                Qx = np.linspace(0, 1.5 * float(amax.max()), 10000)
                pdf_fitted = gumbel_r.pdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
                cdf_fitted = gumbel_r.cdf(Qx,
                                          loc=param_dist[0],
                                          scale=param_dist[1])
            # then calculate the the T (return period) T = 1/(1-F)
            if SavePlots:
                fig = plt.figure(60, figsize=(20, 10))
                gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
                # Plot the histogram and the fitted distribution, save it for each gauge.
                ax1 = fig.add_subplot(gs[0, 0])
                ax1.plot(Qx, pdf_fitted, 'r-')
                ax1.hist(amax, density=True)
                ax1.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax1.set_ylabel('pdf', fontsize=15)

                ax2 = fig.add_subplot(gs[0, 1])
                ax2.plot(Qx, cdf_fitted, 'r-')
                ax2.plot(amax, cdf_Weibul, '.-')
                ax2.set_xlabel('Annual Discharge(m3/s)', fontsize=15)
                ax2.set_ylabel('cdf', fontsize=15)

                plt.savefig(SavePath + "/" + "Figures/" + str(i) + '.png',
                            format='png')
                plt.close()

                fig = plt.figure(70, figsize=(10, 8))
                plt.plot(Qth,
                         amax,
                         'd',
                         color='#606060',
                         markersize=12,
                         label='Gumbel Distribution')
                plt.plot(Qth,
                         Qth,
                         '^-.',
                         color="#3D59AB",
                         label="Weibul plotting position")
                if Distibution != "GEV":
                    plt.plot(Qth,
                             Qlower,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Lower limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")
                    plt.plot(Qth,
                             Qupper,
                             '*--',
                             color="#DC143C",
                             markersize=12,
                             label='Upper limit (' +
                             str(int(
                                 (1 - SignificanceLevel) * 100)) + " % CI)")

                plt.legend(fontsize=15, framealpha=1)
                plt.xlabel('Theoretical Annual Discharge(m3/s)', fontsize=15)
                plt.ylabel('Annual Discharge(m3/s)', fontsize=15)
                plt.savefig(SavePath + "/" + "Figures/F-" + str(i) + '.png',
                            format='png')
                plt.close()

            StatisticalPr.loc[i, 'mean'] = QTS.mean()
            StatisticalPr.loc[i, 'std'] = QTS.std()
            StatisticalPr.loc[i, 'min'] = QTS.min()
            StatisticalPr.loc[i, '5%'] = QTS.quantile(0.05)
            StatisticalPr.loc[i, '25%'] = QTS.quantile(0.25)
            StatisticalPr.loc[i, 'median'] = QTS.quantile(0.50)
            StatisticalPr.loc[i, '75%'] = QTS.quantile(0.75)
            StatisticalPr.loc[i, '95%'] = QTS.quantile(0.95)
            StatisticalPr.loc[i, 'max'] = QTS.max()
            StatisticalPr.loc[i, 't_beg'] = QTS.index.min()
            StatisticalPr.loc[i, 't_end'] = QTS.index.max()
            StatisticalPr.loc[
                i, 'nyr'] = (StatisticalPr.loc[i, 't_end'] -
                             StatisticalPr.loc[i, 't_beg']).days / 365.25
            for irp, irp_name in zip(Qrp, rp_name):
                StatisticalPr.loc[i, irp_name] = irp

            # Print for prompt and check progress.
            print("Gauge", i, "done.")
        #
        # Output file
        StatisticalPr.to_csv(SavePath + "/" + "Statistical Properties.csv")
        self.StatisticalPr = StatisticalPr
        DistributionPr.to_csv(SavePath + "/" + "DistributionProperties.csv")
        self.DistributionPr = DistributionPr
コード例 #15
0
ファイル: extremes.py プロジェクト: teslakit/teslakit
def Plot_Fit_QQ(data_fit,
                vn,
                xds_GEV_Par,
                kma_fit,
                color='black',
                gs_1=1,
                gs_2=1,
                n_clusters=1,
                show=True):
    'Plots QQ (empirical-gev) for variable vn and each kma cluster'

    # plot figure
    fig = plt.figure(figsize=(_fsize * gs_2 / 2, _fsize * gs_1 / 2.3))

    # grid spec
    gs = gridspec.GridSpec(gs_1, gs_2)  #, wspace=0.0, hspace=0.0)

    # clusters
    for c in range(n_clusters):

        # select wt data
        wt = c + 1
        ph_wt = np.where(kma_fit.bmus == wt)[0]
        dh = data_fit[vn].values[:][ph_wt]
        dh = dh[~np.isnan(dh)]

        # prepare data
        Q_emp = np.sort(dh)
        bs = np.linspace(1, len(dh), len(dh))
        pp = bs / (len(dh) + 1)

        # TODO: problem if gumbell?
        # select wt GEV parameters
        pars_GEV = xds_GEV_Par[vn]
        sha = pars_GEV.sel(parameter='shape').sel(n_cluster=wt).values
        sca = pars_GEV.sel(parameter='scale').sel(n_cluster=wt).values
        loc = pars_GEV.sel(parameter='location').sel(n_cluster=wt).values

        # calc GEV pdf
        Q_gev = genextreme.ppf(pp, -1 * sha, loc, sca)

        # scatter plot
        ax = fig.add_subplot(gs[c])
        ax.plot(Q_emp,
                Q_gev,
                'ok',
                color=color,
                label='N = {0}'.format(len(dh)))
        ax.plot([0, 1], [0, 1], '--b', transform=ax.transAxes)

        # customize axis
        ax.set_title('WT: {0}'.format(wt))
        ax.axis('equal')
        #ax.set_xlabel('Empirical')
        ax.set_ylabel('GEV')
        ax.legend(prop={'size': 8})

    # fig suptitle
    #fig.suptitle('{0}'.format(vn), fontsize=14, fontweight = 'bold')

    # show and return figure
    if show: plt.show()
    return fig
コード例 #16
0
ファイル: extremes.py プロジェクト: teslakit/teslakit
def Plot_FitSim_GevFit(data_fit,
                       data_sim,
                       vn,
                       xds_GEV_Par,
                       kma_fit,
                       n_bins=30,
                       color_1='white',
                       color_2='skyblue',
                       alpha_1=0.7,
                       alpha_2=0.4,
                       label_1='Historical',
                       label_2='Simulation',
                       gs_1=1,
                       gs_2=1,
                       n_clusters=1,
                       vlim=1,
                       show=True):
    'Plots fit vs sim histograms and gev fit by clusters for variable "vn"'

    # plot figure
    fig = plt.figure(figsize=(_fsize * gs_2 / 2, _fsize * gs_1 / 2.3))

    # grid spec
    gs = gridspec.GridSpec(gs_1, gs_2)  #, wspace=0.0, hspace=0.0)

    # clusters
    for c in range(n_clusters):

        # select wt data
        wt = c + 1

        ph_wt = np.where(kma_fit.bmus == wt)[0]
        ps_wt = np.where(data_sim.DWT == wt)[0]

        dh = data_fit[vn].values[:][ph_wt]  #; dh = dh[~np.isnan(dh)]
        ds = data_sim[vn].values[:][ps_wt]  #; ds = ds[~np.isnan(ds)]

        # TODO: problem if gumbell?
        # select wt GEV parameters
        pars_GEV = xds_GEV_Par[vn]
        sha = pars_GEV.sel(parameter='shape').sel(n_cluster=wt).values
        sca = pars_GEV.sel(parameter='scale').sel(n_cluster=wt).values
        loc = pars_GEV.sel(parameter='location').sel(n_cluster=wt).values

        # compare histograms
        ax = fig.add_subplot(gs[c])
        axplot_compare_histograms(
            ax,
            dh,
            ds,
            ttl='WT: {0}'.format(wt),
            density=True,
            n_bins=n_bins,
            color_1=color_1,
            color_2=color_2,
            alpha_1=alpha_1,
            alpha_2=alpha_2,
            label_1=label_1,
            label_2=label_2,
        )

        # add gev fit
        x = np.linspace(genextreme.ppf(0.001, -1 * sha, loc, sca), vlim, 100)
        ax.plot(x, genextreme.pdf(x, -1 * sha, loc, sca), label='GEV fit')

        # customize axis
        ax.legend(prop={'size': 8})

    # fig suptitle
    #fig.suptitle('{0}'.format(vn), fontsize=14, fontweight = 'bold')

    # show and return figure
    if show: plt.show()
    return fig
コード例 #17
0
ファイル: funcoesBasicas.py プロジェクト: clebsonpy/HidroComp


#========================================
c = controle('Banco_Hidro2')
prob = [0.001, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.999]
metodo = ['MML','MOM','MVS']
tamanhoAmostra = 100

aux = []
for m in metodo:
    aux.append(c.prepGra(metodo=m, tamanhoAmostra=tamanhoAmostra, probabilidade=prob))

dadosExt = []
for i in prob:
    dadosExt.append(gev.ppf(i, -0.168462, 6286.926278, 1819.961392))
e = est.estatistica()
accu = []
for me in range(3):
    ac = []
    for d in range(9):
        ac.append(dadosExt[d] - e.calculoAccu(dadosAmostra=aux[me][d], dadoSintetico=dadosExt[d]))
    accu.append(ac)
print(accu)

ax1 = plt.subplot(221)
ax2 = plt.subplot(223)
ax3 = plt.subplot(122)

c.plotGraf2(axes=ax1, accu=accu[1], dadosExt=dadosExt, yd=prob, quan=aux[1], metodo='MOM', tamanhoAmostra=tamanhoAmostra, prob=prob)
c.plotGraf2(axes=ax2, accu=accu[2], dadosExt=dadosExt, yd=prob, quan=aux[2], metodo='MVS', tamanhoAmostra=tamanhoAmostra, prob=prob)
コード例 #18
0
ファイル: ocean_kit.py プロジェクト: rdkit/OCEAN
def calc_ocean_parameter(FP_MANAGER, fp, datasource, recalc=False):
    """
    http://www.jamesphoughton.com/2013/08/making-gif-animations-with-matplotlib.html
    """
    print "calcOceanStatistics function start"
    db_ocean = DB_connector("default")  # chembl
    cursor = db_ocean.cursor

    ds = DataSources.objects.get(name=datasource.name)
    if recalc:
        print "delete rnd set items for fp",fp
        Rnd_set_comparison.objects.all().filter(fp=fp).filter(datasource=ds).delete()
        print "done"
    print "delete parameter entries for fp",fp
    FP_Parameter.objects.all().filter(fp_id=fp).filter(datasource=ds).delete()
    print "done"

    if not recalc and Rnd_set_comparison.objects.all().filter(fp=fp).filter(datasource=ds).count()==0:
        return "no entries for fp %d, try ?recalc=True" % fp

    repeats = settings.CALC_OCEAN_PARAMETER_REPEATS

    start = settings.CALC_OCEAN_PARAMETER_START
    end = settings.CALC_OCEAN_PARAMETER_END
    steps = settings.CALC_OCEAN_PARAMETER_STEPS

    thresh_start = settings.CALC_OCEAN_PARAMETER_THRESH_START
    thresh_end   = settings.CALC_OCEAN_PARAMETER_THRESH_END
    thresh_steps = settings.CALC_OCEAN_PARAMETER_THRESH_STEPS

    animatedGif = True

    try:
        from PIL import Image
        from images2gif import writeGif
    except:
        print >> sys.stderr, "Couldn't import Image from PIL or writeGif from images2gif, so plotting is deactivated now"
        animatedGif = False

    plotting = True
    try:
        import matplotlib.pyplot as plt
    except:
        plotting = True
        animatedGif = False

    processes = settings.PARALLEL_PROCESSES
    if recalc: walker = Pool(processes=processes)

    thresh_list = np.arange(thresh_start,thresh_end,thresh_steps)
    molecule_ids = np.asarray(FP_MANAGER[datasource][fp].keys())

    ds = DataSources.objects.get(name=datasource.name)
    for runde in range(repeats):
        if not recalc: continue

        print "runde %d" % runde
        result = {}
        rand_lists1 = createRandLists(start,end,steps,molecule_ids)
        rand_lists2 = createRandLists(start,end,steps,molecule_ids)

        tasks = [([FP_MANAGER[datasource][fp].get(x1) for x1 in rand_lists1[i]],[FP_MANAGER[datasource][fp].get(x2) for x2 in rand_lists2[i]]) for i in range(len(rand_lists2))]

        if processes>1:
            np.random.shuffle(tasks)
            result2 = {}
            for data_entry in walker.imap_unordered(get_tc_list_para,tasks,20):
                result2[data_entry[0]] = data_entry[1]
                print "addet %d of %d" % (len(result2),len(tasks))
        else:
            result2 = {}
            while (len(tasks)>0):
                task = tasks.pop()
                score = get_tc_list_para(task)
                result2[score[0]] = score[1]
                print "addet %d of %d" % (len(result2),len(tasks))

        print "create %d Result-Objects for DB-Table rnd_set_comparison" % (len(thresh_list) * len(result2))
        with transaction.atomic():
            buffer = []
            for threshold in thresh_list:
                for key,value in result2.iteritems():
                    raw_score = np.sum(value[value>=threshold])
                    item = (key**2,fp,threshold,raw_score)
                    buffer.append(item)
            print "created %d buffered items" % len(buffer)

            for w,x,y,z in buffer:
                obj = Rnd_set_comparison(setsize=w,fp=x,threshold=y,rawscore=z,datasource=ds)
                obj.save()

    figures = []

    data_cache = {}

    min_mean = None
    max_mean = None
    min_stddev = None
    max_stddev = None

    for threshold in thresh_list:
        if db_ocean.db_type=='postgre':
            query = "select setsize,threshold, round(stddev_pop(rawscore)::numeric,2) as stddev_pop,round(avg(rawscore)::numeric,2) as mean from ocean_rnd_set_comparison where fp=%d and threshold=%f and datasource_id=%d group by setsize,threshold order by setsize" % (fp,threshold,ds.id)
        else:
            query = "select setsize,threshold,round(stddev(rawscore),2) as stddev,round(avg(rawscore),2) as mean from ocean_rnd_set_comparison where fp=%d and threshold=%f and datasource_id=%d group by setsize,threshold order by setsize" % (
            fp, threshold, ds.id)
        cursor.execute(query)

        x_data = []
        stddev_data = []
        mean_data = []
        for result in cursor.fetchall():
            x_data.append(float(result[0]))
            mean_data.append(float(result[3]))
            stddev_data.append(float(result[2]))

        if min_mean is None:
            if len(mean_data) > 0:
                min_mean,max_mean = min(mean_data),max(mean_data)
            if len(stddev_data) > 0:
                min_stddev,max_stddev = min(stddev_data),max(stddev_data)
        else:
            if len(mean_data) > 0:
                min_mean, max_mean = min([min_mean,min(mean_data)]), max([max_mean,max(mean_data)])
            if len(stddev_data) > 0:
                min_stddev, max_stddev = min([min_stddev, min(stddev_data)]), max([max_stddev, max(stddev_data)])

        data_cache[threshold] = (x_data,mean_data,stddev_data)

    skip_3_to_6 = True

    for threshold in thresh_list:
        x_data,mean_data,stddev_data = data_cache[threshold]

        if len(x_data) == 0 or len(mean_data)==0 or len(stddev_data)==0:
            continue
        if plotting:
            plt.clf()

        if plotting:
            if skip_3_to_6:
                fig,(r0,r1,r2,r6) = plt.subplots(nrows=4,figsize=(12,14))
            else:
                fig,(r0,r1,r2,r3,r4,r5,r6) = plt.subplots(nrows=7,figsize=(6,14))

        raw_mean_func = Calculator.getRawScoreExpFunction(x_data,mean_data)
        print "\nmean function for threshold: %f is [%s]" % (threshold,raw_mean_func.func_name)

        exp_mean_data = [raw_mean_func(en) for en in x_data]
        if plotting:
            r0.plot(np.array(x_data),np.array(mean_data),linewidth=1.0)
            r0.plot(x_data,exp_mean_data,alpha=0.5,linewidth=2.5)
            r0.set_title("Mean, Threshold: %.2f" % threshold)

            r0.set_ylim((min_mean,max_mean))
            r1.set_ylim((min_stddev,max_stddev))
            r2.set_xlim((-1,1.5))
            r2.set_ylim((0,2.5))
        new_std_function = Calculator.getRawScoreStdDevExpFunction(x_data,stddev_data)

        print "stddev function for threshold: %f is [%s]" % (threshold,new_std_function.func_name)

        newdata2 = new_std_function(x_data)

        if plotting:
            r1.plot(x_data,stddev_data)
            r1.plot(x_data, newdata2, alpha=0.8, linewidth=2.0)
            r1.set_title("StdDev")

        z_Scores = Calculator.getZScores(x_data,mean_data,raw_mean_func,new_std_function)

        histo_bins = 50
        counts,bin_edges = np.histogram(z_Scores,histo_bins,normed=True)
        bin_centres = (bin_edges[:-1] + bin_edges[1:])/2.


        if plotting:
            n,bins,patches = r2.hist(z_Scores,bins=histo_bins,normed=True,alpha=0.5)
            r2.set_title("z-Scores")

        e_val_function = Calculator.getZScoreDistExpFunction(z_Scores)
        e_val_data_x = np.linspace(min(z_Scores),max(z_Scores),num=500)
        e_val_data = [e_val_function(entry) for entry in e_val_data_x]
        if plotting:
            if not skip_3_to_6: r3.plot(e_val_data_x,e_val_data,alpha=0.5)

        c=-0.1
        for c in [-0.05]:
            x_ls = np.linspace(ge.ppf(0.01,c),ge.ppf(0.99,c),100)
            if plotting:
                if not skip_3_to_6: r4.plot(x_ls,ge.pdf(x_ls,c),linewidth=1.6-c*4)

        (shape_evd,loc_evd,scale_evd) = ge.fit(z_Scores)

        loc_norm,scale_norm = norm.fit(z_Scores)
        x = ge.pdf(bin_centres,shape_evd,loc=loc_evd,scale=scale_evd)

        if plotting:
            evd_plot, = r2.plot(bin_centres,x,'b',color='black',label='Extreme Value Distribution')

        ndist = norm.pdf(bin_centres,loc=loc_norm,scale=scale_norm)
        if plotting:
            norm_plot, = r2.plot(bin_centres,ndist,'b',color="red",label='Normal Distribution')
            r2.legend([evd_plot,norm_plot],['Extreme Value Distribution','Normal Distribution'],loc=1)

        def getDecNpArray(value):
            return np.asarray(value).astype(float)

        expected_evd = getDecNpArray(x)
        expected_norm = getDecNpArray(ndist)
        observed = getDecNpArray(counts)

        def normalizedChisquare(observed,expected):
            if len(observed) != len(expected): raise Exception("len of observed and expected has to be the same")

            zipped = zip(observed,expected)
            fun = lambda input: ((input[0]-input[1])**2 / (input[0]+input[1]))
            result = sum(map(fun,zipped))

            return result

        chisq_mean = normalizedChisquare(observed,expected_norm)
        chisq_evd = normalizedChisquare(observed,expected_evd)

        print "chisquare_norm",chisq_mean
        print "chisquare_evd",chisq_evd

        #django doesn't like inf or -inf in float-fields of oracle database, so we change it..
        if isinf(chisq_mean) or isnan(chisq_mean):
            print "chisquare_norm seems to be inf or nan (%s), change to -1.0" % str(chisq_mean)
            chisq_mean = -1.0
        if isinf(chisq_evd) or isnan(chisq_evd):
            print "chisquare_evd seems to be inf or nan (%s), change to -1.0" % str(chisq_evd)
            chisq_evd = -1.0

        if plotting:
            if not skip_3_to_6: n,bins,patches = r5.hist(z_Scores,bins=histo_bins,normed=True,alpha=0.75)#,bins=20)

            if not skip_3_to_6:
                import matplotlib.mlab as mlab
                y = mlab.normpdf(bins,loc_evd,scale_evd)

        fp_parameter = FP_Parameter(fp_id=fp,
                                    threshold=threshold,
                                    formula_raw_mean=raw_mean_func.func_name,
                                    formula_raw_stddev=new_std_function.func_name,
                                    chisquare_mean=chisq_mean,
                                    chisquare_evd=chisq_evd,
                                    datasource=ds)
        fp_parameter.save()
        if plotting:
            if not skip_3_to_6: r5.plot(bins,y)

        if threshold==thresh_list[-1]:      #this is last round
            print "last round"

            query = "select threshold,chisquare_mean,chisquare_evd from ocean_fp_parameter where fp_id=%d and datasource_id=%d order by threshold" % (fp,ds.id)
            cursor.execute(query)
            data_chi2_mean = []
            data_chi2_evd = []
            x_chidata = []
            for val in cursor.fetchall():
                x_chidata.append(float(val[0]))
                data_chi2_mean.append(float(val[1]))
                data_chi2_evd.append(float(val[2]))

            print x_chidata,data_chi2_mean,data_chi2_evd

            if plotting:
                if not skip_3_to_6: r6.plot(x_chidata,data_chi2_mean,'o')
                if not skip_3_to_6: r6.plot(x_chidata,data_chi2_evd,'.')
                chi2_mean, = r6.plot(x_chidata,data_chi2_mean,'o')
                chi2_evd, = r6.plot(x_chidata,data_chi2_evd,'.')
                r6.legend([chi2_mean,chi2_evd],['ChiSquare Normal Distribution','ChiSquare Extreme Value Distribution'],loc=1)

        def fitfunc(p,x):
            if p[0]==0:
                return np.exp(-np.exp(-x))*np.exp(-x)
            else:
                print p[0],type(x)
                return np.exp(-(1-p[0]*x)**(1/p[0]))*(1-p[0]*x)**(1/p[0]-1)
        errfunc = lambda p,x,y: (y-fitfunc(p,x))

        init = [0.2]

        bins = bins[:-1]
        bins = np.array(bins)
        n = np.array(n)

        if plotting:
            plt.tight_layout()
            filename = "%f.png" % threshold
            plt.savefig(filename)
            figures.append(filename)

    if animatedGif:
        file_names = figures
        print "d",file_names
        images = [Image.open(fn) for fn in file_names]
        writeGif("animation_mean_stddev.gif",images,duration=0.5)
        for image in images:
            image.close()
コード例 #19
0
from scipy.stats import genextreme
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

c = -0.1
mean, var, skew, kurt = genextreme.stats(c, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genextreme(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genextreme.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c))
# True

# Generate random numbers:
コード例 #20
0
            ]
            print(' '.join(cdo_cmd))
            ret = subprocess.call(cdo_cmd)
            if not ret == 0:
                raise Exception('Error with cdo command')
    with Dataset(maxfile, 'r') as f:
        fulldata[i, :] = f.variables['IRFroutedRunoff'][0, :]

# Calclate GEV fit and return periods for each river segment
retperiod_q = np.zeros([len(percentiles), nreaches])
minshape = -0.3
for reach in range(nreaches):
    qvals = fulldata[:, reach]
    try:
        c, loc, scale = genextreme.fit(qvals, -0.01)
        tmp = genextreme.ppf(percentiles / 100., c, loc, scale)
#		if min(tmp)<0.:
#			print('Warning, trying negative shape')
#			c,loc,scale = genextreme.fit(qvals,-0.01)
#			tmp = genextreme.ppf(percentiles/100.,c,loc,scale)
    except Exception as e:
        print('error fitting', reach, qvals)
        # try with different shape parameter guess
        c, loc, scale = genextreme.fit(qvals, 0.0)
        tmp = genextreme.ppf(percentiles / 100., c, loc, scale)
    retperiod_q[:, reach] = tmp
    if tmp.min() < 0 or tmp.max() > 5 * qvals.max():
        #print('debug: reach,fit',reach,c,loc,scale)
        #print('qvals',qvals.min(),np.median(qvals),qvals.max())
        #print('fitted vals',tmp)
        c, loc, scale = genextreme.fit(qvals, f0=minshape)