Exemple #1
0
def add_spikes_asymmetric(ts: torch.Tensor,
                          xi: List[float] = [1 / 50.0, 1 / 25.0]):
    """
    Adds spikes to 15% of the time series in the form of heavy-tailed (Generalized Pareto) realizations

    Arguments:
        ts: time series
        xi: [float, float], GenPareto heaviness parameter for [lower, upper] noise respectively
    """
    num_spikes = int(0.15 * ts.shape[0])
    half_num_spikes = [int(num_spikes / 2)]
    half_num_spikes.append(num_spikes - half_num_spikes[0])
    spike_direction = [-1, 1]

    indices_for_gp_spikes = np.random.choice(np.arange(len(ts)),
                                             replace=False,
                                             size=num_spikes)

    idx = 0
    spikes = (stats.genpareto(xi[idx]).rvs(half_num_spikes[idx]) *
              spike_direction[idx])
    ts[indices_for_gp_spikes[:half_num_spikes[0]]] += spikes

    idx = 1
    spikes = (stats.genpareto(xi[idx]).rvs(half_num_spikes[idx]) *
              spike_direction[idx])
    ts[indices_for_gp_spikes[half_num_spikes[0]:]] += spikes

    return ts
Exemple #2
0
 def _kstest(self, loc, scale, conc, samples):
     # Uses the Kolmogorov-Smirnov test for goodness of fit.
     ks, _ = sp_stats.kstest(
         samples,
         sp_stats.genpareto(conc, loc=loc, scale=scale).cdf)
     # Return True when the test passes.
     return ks < 0.02
Exemple #3
0
    def find_optimal_tail(self):
        """
        The function fits all tails and saves the generated fit information. After all tails have been fitted
        the tail with the minimal AU2 test statistic and the index of the tail are saved.
        
        Returns:
            None
        """
        # make sure all lists are cleaned up
        self.cdf_list = []
        self.rv_list = []
        # fit the tails
        for index, tail in enumerate(self.generate_tails(self.data)):
            print("\t" + str(index) + "/" + str(self.data.size), end='\r', flush=True)
            cdf, fit_out = self.fit_tail(tail)
            self.cdf_list.append(cdf)
            # save rv's
            rv = genpareto(c=fit_out[0],
                           loc=fit_out[1],
                           scale=fit_out[2])
            self.rv_list.append(rv)

        # calculate the test statitics
        self.au_2_data = np.array([au2(tail) for tail in self.cdf_list])
        self.cramer_data = np.array([cramer_von_mises(tail) for tail in self.cdf_list])
        self.anderson_data = np.array([anderson_darling(tail) for tail in self.cdf_list])

        self.optimal_tail_index = self.au_2_data.argmin()
        self.optimal_tail = self.cdf_list[self.au_2_data.argmin()]
  def testCDF(self, dist):
    xs = self.evaluate(dist.sample())
    cdf = dist.cdf(xs)
    self.assertEqual(dist.batch_shape, cdf.shape)

    loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
    expected_cdf = sp_stats.genpareto(conc, loc=loc, scale=scale).cdf(xs)
    self.assertAllClose(expected_cdf, self.evaluate(cdf), rtol=5e-5)
 def testMean(self, dist):
   loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
   self.assertEqual(dist.batch_shape, dist.mean().shape)
   if np.abs(conc) < 1e-5 and conc != 0:
     return  # scipy does badly at small nonzero concentrations.
   expected = sp_stats.genpareto(conc, loc=loc, scale=scale).mean()
   actual = self.evaluate(dist.mean())
   self.assertAllClose(expected, actual, rtol=5e-4)
Exemple #6
0
def extremeDistribution_peaksOverThreshold(x, x_e, t_x, t_st, u):
    '''Approximates the short-term extreme distribution using the peaks over
    threshold method.

    Parameters
    ----------
        x : np.array
            Independent random variable (global peaks)
        x_e : np.array
            Array of x values at which to evaluate the short-term extreme CDF
        t_x : float
            Time length of the x array
        t_st : float
            Short-term period
        u : float
            Threshold below which peaks (x) are ignored

    Returns
    -------
        stextreme_dist: ecmDist object
            Probability distribution of the short-term extreme.
        stextreme_dist : ecmDist object
            Probability distribution of the short-term extreme.
        peaks_dist : ecmDist object
            Probability distribution of the peaks.
        peaksOverThreshold_dist: scipy.stats rv_frozen
            Probaility distribution of the peaks over threshold.
        pot_params: np.array length 3
            Parameters of peak over threshold's distribution using Generalized
            Pareto[shape_c, loc, scale].
    '''
    # peaks over threshold
    pot = np.sort(x)
    pot = pot[(pot > u)] - u
    N = len(x)
    Npot = len(pot)
    # Fit a generalized Pareto
    pot_params = stats.genpareto.fit(pot, floc=0.)
    peaksOverThreshold_dist = stats.genpareto(c=pot_params[0],
                                              loc=pot_params[1],
                                              scale=pot_params[2])
    # peaks
    x_e_pot = x_e[x_e >= u]
    genpareto_cdf = peaksOverThreshold_dist.cdf(x_e_pot - u)
    A = 1. - genpareto_cdf
    k = 1. * Npot / (1. * N)
    peaks_cdf = 1. - (k * A)
    peaks_dist = ecmDist(x_e_pot, cdf=peaks_cdf)
    # short-term extreme
    ratio = t_st / t_x
    N_st = 1. * N * ratio
    ste_cdf = peaks_cdf**N_st
    stextreme_dist = ecmDist(x_e_pot, cdf=ste_cdf)
    # return
    return stextreme_dist, peaks_dist, peaksOverThreshold_dist, pot_params
Exemple #7
0
def extremeDistribution_peaksOverThreshold(x, x_e, t_x, t_st, u):
    '''Approximates the short-term extreme distribution using the peaks over
    threshold method.

    Parameters
    ----------
        x : np.array
            Independent random variable (global peaks)
        x_e : np.array
            Array of x values at which to evaluate the short-term extreme CDF
        t_x : float
            Time length of the x array
        t_st : float
            Short-term period
        u : float
            Threshold below which peaks (x) are ignored

    Returns
    -------
        stextreme_dist: ecmDist object
            Probability distribution of the short-term extreme.
        stextreme_dist : ecmDist object
            Probability distribution of the short-term extreme.
        peaks_dist : ecmDist object
            Probability distribution of the peaks.
        peaksOverThreshold_dist: scipy.stats rv_frozen
            Probaility distribution of the peaks over threshold.
        pot_params: np.array length 3
            Parameters of peak over threshold's distribution (Generalized
            Pareto)[shape_c, loc, scale].
    '''
    # peaks over threshold
    pot = np.sort(x)
    pot = pot[(pot > u)] - u
    N = len(x)
    Npot = len(pot)
    # Fit a generalized Pareto
    pot_params = stats.genpareto.fit(pot, floc=0.)
    peaksOverThreshold_dist = stats.genpareto(c=pot_params[0],
                                              loc=pot_params[1],
                                              scale=pot_params[2])
    # peaks
    x_e_pot = x_e[x_e>=u]
    genpareto_cdf = peaksOverThreshold_dist.cdf(x_e_pot-u)
    A = 1. - genpareto_cdf
    k = 1.*Npot / (1.*N)
    peaks_cdf = 1. - (k * A)
    peaks_dist = ecmDist(x_e_pot, cdf=peaks_cdf)
    # short-term extreme
    ratio = t_st / t_x
    N_st = 1.*N * ratio
    ste_cdf = peaks_cdf ** N_st
    stextreme_dist = ecmDist(x_e_pot, cdf=ste_cdf)
    # return
    return stextreme_dist, peaks_dist, peaksOverThreshold_dist, pot_params
 def testMean(self, dist):
   loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
   hp.note('Location: {}, scale: {}, concentration: {}'.format(
       loc, scale, conc))
   self.assertEqual(dist.batch_shape, dist.mean().shape)
   # scipy doesn't seem to be very accurate for small concentrations, so use
   # higher precision.
   expected = sp_stats.genpareto(np.float64(conc), loc=np.float64(loc),
                                 scale=np.float64(scale)).mean()
   actual = self.evaluate(dist.mean())
   self.assertAllClose(expected, actual, rtol=5e-4)
 def testVariance(self, dist):
   loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
   self.assertEqual(dist.batch_shape, dist.variance().shape)
   expected = sp_stats.genpareto(conc, loc=loc, scale=scale).var()
   if np.abs(conc) < 1e-4 and conc != 0:
     return  # scipy does badly at small nonzero concentrations.
   if expected <= 0:
     return  # scipy sometimes returns nonsense zero or negative variances.
   actual = self.evaluate(dist.variance())
   print('var', loc, scale, conc, expected, actual, file=sys.stderr)
   self.assertAllClose(expected, actual, rtol=.01)
  def testCDF(self, dist):
    xs = self.evaluate(dist.sample())
    cdf = dist.cdf(xs)
    self.assertEqual(dist.batch_shape, cdf.shape)

    loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
    expected_cdf = sp_stats.genpareto(conc, loc=loc, scale=scale).cdf(xs)
    actual_cdf = self.evaluate(cdf)
    msg = ('Location: {}, scale: {}, concentration: {}, xs: {} '
           'scipy cdf: {}, tfp cdf: {}')
    hp.note(msg.format(loc, scale, conc, xs, expected_cdf, actual_cdf))
    self.assertAllClose(expected_cdf, actual_cdf, rtol=5e-5)
  def testLogPDF(self, dist):
    xs = self.evaluate(dist.sample())

    logp = dist.log_prob(xs)
    self.assertEqual(dist.batch_shape, logp.shape)
    p = dist.prob(xs)
    self.assertEqual(dist.batch_shape, p.shape)

    loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
    expected_logp = sp_stats.genpareto(conc, loc=loc, scale=scale).logpdf(xs)
    actual_logp = self.evaluate(logp)
    self.assertAllClose(expected_logp, actual_logp, rtol=1e-5)
    self.assertAllClose(np.exp(expected_logp), self.evaluate(p), rtol=1e-5)
Exemple #12
0
    def _p(test_i, null_i, M_i, d_i):
        gpd_fit = None
        gpd_fit_p_value = None

        n_i = n
        
        # TODO: no need to sort as much as N numbers, do partial sort:
        #  but this requires some tests (both performance and unit)
        # null_i_partitioned = np.partition(null_i, n_i+1)
        # null_i_first_n_sorted = sorted(null_i_partitioned[:-n_i+1])
        null_i = sorted(null_i)
        t = None
        
        if all(np.isnan(null_i)):
            return np.nan, False, np.nan, np.nan
        
        # compute ecdf based, biased estimate of p-value
        raw_ecdf_estimate = (ecdf_pseudocount + d_i.sum()) / (N + 1)
        
        if M_i < m:
            # fit GDP, reducing $n$ until convergance
            while n_i > 0:
                
                # -1 because Python has 0-based indexing
                t = (null_i[-n_i-1] + null_i[-n_i-2]) / 2
                
                y_untill_n = null_i[-n_i:]
                exceedences = y_untill_n - t

                assert all(y_untill_n >= t)
                assert len(exceedences) == n_i
                
                fit = genpareto.fit(exceedences)
                fitted = genpareto(*fit)
                gpd_fit = fitted
                
                gpd_fit_p_value = ad_test(exceedences, fitted).pvalue

                if gpd_fit_p_value <= 0.05:
                    break
                else:
                    n_i -= decrease_n_by

        if gpd_fit and gpd_fit_p_value < 0.05:
            return n_i / N * (1 - gpd_fit.cdf(test_i - t)), True, gpd_fit_p_value, raw_ecdf_estimate
        else:
            if gpd_fit:
                # TODO: get index and highlight which observation could not be fitted!
                warn(f'A good GPD fit could not be reached, using ECDF estimate instead')
            
            return raw_ecdf_estimate, False, np.nan, raw_ecdf_estimate
 def testVariance(self, dist):
   loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
   self.assertEqual(dist.batch_shape, dist.variance().shape)
   # scipy doesn't seem to be very accurate for small concentrations, so use
   # higher precision.
   expected = sp_stats.genpareto(np.float64(conc), loc=np.float64(loc),
                                 scale=np.float64(scale)).var()
   if expected <= 0:
     return  # scipy sometimes returns nonsense zero or negative variances.
   actual = self.evaluate(dist.variance())
   msg = ('Location: {}, scale: {}, concentration: {}, '
          'scipy variance: {}, tfp variance: {}')
   hp.note(msg.format(loc, scale, conc, expected, actual))
   self.assertAllClose(expected, actual)
Exemple #14
0
def square_error_genpareto(shape):

    from scipy.stats import genpareto

    distribution = genpareto(c=shape)

    square_errors = [
        np.power(mean - distribution.mean(), 2.0) * mean_error_weight,
        np.power(lejp - distribution.ppf(percentile_lower), 2.0) *
        lejp_error_weight,
        np.power(uejp - distribution.ppf(percentile_upper), 2.0) *
        uejp_error_weight
    ]

    return square_errors
Exemple #15
0
    def test_ppf(self):
        xi = 0.1
        mu = 0.05
        sig = 0.5

        model = GenParetoDist(xi, mu, sig)
        model_sp = genpareto(c=xi, loc=mu, scale=sig)

        us = np.array(np.linspace(0.01, 0.99), dtype=float)

        vec = model.ppf(us)
        vec_sp = model_sp.ppf(us)

        for idx in range(us.__len__()):
            self.assertAlmostEqual(vec[idx], vec_sp[idx], delta=1e-10)

        pass
Exemple #16
0
 def testVariance(self, dist):
   loc, scale, conc = self.evaluate([dist.loc, dist.scale, dist.concentration])
   # scipy doesn't seem to be very accurate for small concentrations, so use
   # higher precision.
   expected = sp_stats.genpareto(np.float64(conc), loc=np.float64(loc),
                                 scale=np.float64(scale)).var()
   # scipy sometimes returns nonsense zero or negative variances.
   hp.assume(expected > 0)
   # scipy gets bad answers for very small concentrations even in 64-bit.
   # https://github.com/scipy/scipy/issues/11168
   hp.assume(conc > 1e-5)
   self.assertEqual(dist.batch_shape, dist.variance().shape)
   actual = self.evaluate(dist.variance())
   msg = ('Location: {}, scale: {}, concentration: {}, '
          'scipy variance: {}, tfp variance: {}')
   hp.note(msg.format(loc, scale, conc, expected, actual))
   self.assertAllClose(expected, actual)
Exemple #17
0
def add_spikes(ts: torch.Tensor, only_upper_spikes: bool = False):
    """
    Adds spikes to 15% of the time series in the form of heavy-tailed (Generalized Pareto) realizations

    Arguments:
        ts: time series
        only_upper_spikes: boolean to indicate upper-tailed or two-tailed spikes
    """
    num_spikes = int(0.15 * ts.shape[0])
    indices_for_gp_spikes = np.random.choice(np.arange(len(ts)),
                                             replace=False,
                                             size=num_spikes)
    spike_direction = np.random.choice([-1, 1], replace=True, size=num_spikes)
    if only_upper_spikes:
        spike_direction = np.ones_like(spike_direction)
    spikes = stats.genpareto(1 / 50).rvs(num_spikes) * spike_direction
    ts[indices_for_gp_spikes] += spikes
    return ts
Exemple #18
0
    def fit_tail(tail):
        """
        Fitting the tail using scipys genpareto and calculating the cdf of the tail for the fitted distribution
        Args:
            tail (numpy.ndarray): tail to fit

        Returns:
            numpy.ndarray, tuple: Cdf of the data for the fitted tail, fit parameters (c, loc, scale).
        """
        # floc is set to zero because the data is expected to be transformed, so the location of the pareto distribution
        #  is 0. Check generate_tails for further information.
        fit_out = genpareto.fit(tail, floc=0)
        # generate distribution with the fitted parameters
        estimated_distribution = genpareto(c=fit_out[0], loc=fit_out[1], scale=fit_out[2])
        # calculate the cdf of the estimated distribution in ascending order
        cdf_of_tail = estimated_distribution.cdf(tail)
        cdf_of_tail.sort()
        return cdf_of_tail, fit_out
Exemple #19
0
    def montecarlo_simulation(self, mc_steps=None):
        """
        Runs Monte Carlo simulation for the optimal position.
        
        Args:
            mc_steps: number of Monte Carlo steps to run.

        Returns:
            float: p-value for the AU2 test statistic
            float: p-value for the Anderson-Darling test statistic
            float: p-value for the Cramér-von Mises test statistic
            int: number of montecarlo steps
            
        Raises:
            RuntimeError is the function gets called, when the fit for the optimal tail start has not been run before.
        """
        if (self.optimal_tail_index is None or
                self.rv_list is None or
                self.cdf_list is None):
            raise RuntimeError("Fits have to run before the Monte Carlo simulation")
        if mc_steps is None:
            mc_steps = self.mc_steps
        # generate mc points
        mc_counter_au2 = 0
        mc_counter_a2 = 0
        mc_counter_w2 = 0

        # make sure every thread has a different seed
        random_state = np.random.RandomState(np.random.seed())

        random_variates = self.rv_list[self.optimal_tail_index].rvs(size=(mc_steps, self.optimal_tail.size), random_state=random_state)
        for index, random_variate in enumerate(random_variates):
            print("\t" + str(index) + "/" + str(mc_steps), end='\r', flush=True)
            fit_out = genpareto.fit(np.sort(random_variate)[::-1], floc=0)
            my_pareto = genpareto(c=fit_out[0], loc=fit_out[1], scale=fit_out[2])
            cdf_of_tail = np.sort(my_pareto.cdf(random_variate))
            if au2(cdf_of_tail) > self.au_2_data[self.optimal_tail_index]:
                mc_counter_au2 += 1
            if anderson_darling(cdf_of_tail) > self.anderson_data[self.optimal_tail_index]:
                mc_counter_a2 += 1
            if cramer_von_mises(cdf_of_tail) > self.cramer_data[self.optimal_tail_index]:
                mc_counter_w2 += 1

        return mc_counter_au2, mc_counter_a2, mc_counter_w2, mc_steps
def inter_arrival_dist(num_samples):
    dist = genpareto(0.154971, loc=15, scale=16.0292)
    number = random.random()
    output = []
    probs = [0.00536, 0.00047, 0.17820, 0.09239, 0.00018, 0.02740, 0.00065, 0.00606, 0.00023, 0.00837, 0.08989, 0.00092, 0.00326, 0.01980]
    for i in range(num_samples):
        if number < sum(probs[0:1]):
            output.append(0)
        elif number < sum(probs[0:2]):
            output.append(1)
        elif number < sum(probs[0:3]):
            output.append(2)
        elif number < sum(probs[0:4]):
            output.append(3)
        elif number < sum(probs[0:5]):
            output.append(4)
        elif number < sum(probs[0:6]):
            output.append(5)
        elif number < sum(probs[0:7]):
            output.append(6)
        elif number < sum(probs[0:8]):
            output.append(7)
        elif number < sum(probs[0:9]):
            output.append(8)
        elif number < sum(probs[0:10]):
            output.append(9)
        elif number < sum(probs[0:11]):
            output.append(10)
        elif number < sum(probs[0:12]):
            output.append(11)
        elif number < sum(probs[0:13]):
            output.append(12)
        elif number < sum(probs[0:14]):
            output.append(13)
        elif number < sum(probs[0:15]):
            output.append(14)
        else:
            output.append(dist.rvs())
    return output

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows = 2, ncols = 2, 
                                          dpi=1300)


pp_plot(logeados, stats.gennorm(beta = parametros_normal[0], 
                                loc = parametros_normal[1],
                                scale=parametros_normal[2]), 
        line = True, ax=ax1)

ax1.set_title('Normal generalizada', fontsize=11)


pp_plot(logeados, stats.genpareto(c = parametros_pareto[0], 
                                loc = parametros_pareto[1],
                                scale=parametros_pareto[2]), 
        line = True,ax=ax2)
ax2.set_title('Pareto generalizada', fontsize=11)

pp_plot(logeados, stats.dweibull(c = parametros_weibull[0], 
                                loc = parametros_weibull[1],
                                scale=parametros_weibull[2]), 
        line = True,ax=ax3)
ax3.set_title('Weibull doble', fontsize=11)

pp_plot(logeados, stats.gamma(a = parametros_gamma[0], 
                                loc = parametros_gamma[1],
                                scale=parametros_gamma[2]), 
        line = True,ax=ax4)
ax4.set_title('Gamma', fontsize=11)
Exemple #22
0
def _run_gpd_p(x, x0=0, side="upper", nx=260, fit_alpha=0.05, plot=False):
    """Fit tail with generalized pareto distribution to get p-value of x0.
    Based on Knijnenburg et al, 2009 (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2687965/)
    Here we use a komogorov-smirnof test for equality of distributions 
    instead of Anderson-Darling as used in their paper.
    Parameters
    ==========
    x: array
        Array of values containing the bootstrap or permutation distribution
    x0: float
        The value the distribution is being tested against
    side: string
        Specify the tail of the distribution to be tested must be one of ["upper", "lower"]
    nx: int
        Starting value for the number of excedences to begin counting down from
        while attempting to fit the GPD
    fit_alpha: float
        Alpha used to reject the null hypothesis that the tail of the data
        comes from the fitted GPD.
    Returns
    =======
    p: float
        fitted p-value
    """
    x = np.sort(x)
    fit_p = 0
    n = len(x)
    if nx > len(x):
        nx = len(x)
    if side == "upper":
        epc = np.count_nonzero(x >= x0)
    elif side == "lower":
        epc = np.count_nonzero(x <= x0)
    else:
        raise ValueError(f'side must be one of ["upper", "lower"], you provided {side}')
    if epc >= 10:
        # TODO: binomial estimate of this
        return (epc + 1) / (n + 1)
    while (fit_p < fit_alpha) & (nx > 10):
        nx -= 10
        if side == "upper":
            t = np.mean([x[-1 * nx], x[-1 * nx - 1]])
            tail = x[-1 * nx :] - t
        else:
            t = np.mean([x[nx], x[nx + 1]])
            tail = np.sort((x[:nx]) - t)
        fit_params = stats.genpareto.fit(tail)
        fitted_gpd = stats.genpareto(*fit_params)
        k = fitted_gpd.args[2]
        fit_stat, fit_p = stats.kstest(tail, fitted_gpd.cdf)
    if fit_p < fit_alpha:
        print(
            "Could not fit GPD to tail of distribution, returning empirical cdf based p.",
            flush=True,
        )
        return (epc + 1) / (n + 1)
        # raise Exception("Could not fit GPD to tail of distribution")

    if plot:
        plot_tail(tail, fitted_gpd.cdf)

    if side == "upper":
        p = nx / n * (1 - fitted_gpd.cdf(x0 - t))
        # If p == 0 and K > 0 then we're in a domain where
        # GPD is finite and unsuitable for extrapolation
        # In these cases, return the pvalue for the extreme of x,
        # which will be conservative
        if (p == 0) & (k > 0):
            p = nx / n * (1 - fitted_gpd.cdf(x.max() - t))
            if p == 0:
                return (epc + 1) / (n + 1)
                # raise Exception("p = 0")
        elif (p == 0) & (k <= 0):
            raise Exception("p=0 and k is not > 0")
    else:
        p = nx / n * (fitted_gpd.cdf(x0 - t))
        if (p == 0) & (k > 0):
            p = nx / n * (fitted_gpd.cdf(x.min() - t))
            if p == 0:
                return (epc + 1) / (n + 1)
                # raise Exception("p = 0")
        elif (p == 0) & (k <= 0):
            raise Exception("p=0 and k is not > 0")

    # return nx, t, fitted_gpd, p
    return p
Exemple #23
0
    #the results for the following look strange, maybe refactoring error
    he, h = hess_ndt(logmps, parsgpd, argsgpd, options)
    print(np.linalg.eigh(he)[0])
    f = lambda params: logmps(params, *argsgpd)
    print(f(parsgpd))
    #add binned
    fp2, bp2 = np.histogram(p2rvs, bins=50)
    'fitbinned t-distribution'
    gpdparest_mlebinel = fitbinned(stats.genpareto, fp2, bp2, x0p)
    gpdparest_gmmbinelidentity = fitbinnedgmm(stats.genpareto, fp2, bp2, x0p)
    print('gpdparest_mlebinel', gpdparest_mlebinel)
    print('gpdparest_gmmbinelidentity', gpdparest_gmmbinelidentity)
    gpdparest_gmmquantile2 = fitquantilesgmm(stats.genpareto,
                                             p2rvs,
                                             start=x0p,
                                             pquant=None,
                                             frozen=None)
    print('gpdparest_gmmquantile2', gpdparest_gmmquantile2)

    print(
        fitquantilesgmm(stats.genpareto,
                        p2rvs,
                        start=x0p,
                        pquant=np.linspace(0.01, 0.99, 10),
                        frozen=None))
    fp2, bp2 = np.histogram(p2rvs,
                            bins=stats.genpareto(2).ppf(
                                np.linspace(0, 0.99, 10)))
    print('fitbinnedgmm equal weight bins')
    print(fitbinnedgmm(stats.genpareto, fp2, bp2, x0p))
Exemple #24
0
        self.block5 = nn.ConvTranspose2d(64, out_channels, 4, 2, 1)

    def forward(self, latent, continuous_code):
        inp = torch.cat((latent, continuous_code), 1)
        out = self.block1(inp)
        out = self.block2(out)
        out = self.block3(out)
        out = self.block4(out)
        return torch.tanh(self.block5(out))


latentdim = 20
G = Generator(in_channels=latentdim, out_channels=1).cuda()
genpareto_params = (1.33, 0, 0.0075761900937239765)
threshold = -0.946046018600464
rv = genpareto(*genpareto_params)

G.load_state_dict(torch.load('ExGAN/G999.pt'))
G.eval()

c = 0.75
k = 10
for tau in [0.05, 0.01]:
    tau_prime = tau / (c**k)
    val = rv.ppf(1 - tau_prime) + threshold
    t = time.time()
    code = Variable(torch.ones(100, 1, 1, 1) * val).cuda(2)
    latent = Variable(FloatTensor(torch.randn((100, latentdim, 1, 1)))).cuda(2)
    images = G(latent, code)
    print(time.time() - t)
    torch.save(0.5 * (images + 1), 'ExGAN' + str(tau) + '.pt')
Exemple #25
0
		def loglikelihood(par):
			logscale = par[0]
			shape = par[1]
			dist = genpareto(loc=0,scale=np.exp(logscale),c=shape)
			return -np.mean(dist.logpdf(x))
Exemple #26
0
	def fit_marginal_models(self,n=100,c=0,policy="veto",qq_plots=False,seed=1):

		"""Fit Generalised Pareto models to the negative portion of post-interconnection power margin distributions and returns fitted parameters
  
	  **Parameters**:

	  `n` (`int`): Number of simulated samples with which to fit the models

	  `c` (`int`): Assumed interconnection capacity

	  `policy` (`string`): Either 'veto' or 'share'

	  `qq_plots` (`bool`): if `True`, outputs QQ-plots of fitted models

	  `seed` (`int`): random seed


	  """
		a1_shortfalls = self.hindcast.simulate_region(n=n,m=(-1,np.Inf),c=c,policy=policy)
		a2_shortfalls = self.hindcast.simulate_region(n=n,m=(np.Inf,-1),c=c,policy=policy)

		a1_shortfalls = -a1_shortfalls[:,0]
		a2_shortfalls = -a2_shortfalls[:,1]

		pars0 = self._cdf_univar_gp_fitter(a1_shortfalls,upper_endpoint_lb = -self.hindcast_limits[0])
		pars1 = self._cdf_univar_gp_fitter(a2_shortfalls,upper_endpoint_lb = -self.hindcast_limits[1])

		a1_scale_est = np.exp(pars0.x[0])
		a1_shape_est = pars0.x[1]

		a2_scale_est = np.exp(pars1.x[0])
		a2_shape_est = pars1.x[1]

		#a1_scale_std = np.exp(pars0.hess_inv[0,0]/np.sqrt(n))
		#a1_shape_std = pars0.hess_inv[1,1]/np.sqrt(n)

		#a2_scale_std = np.exp(pars1.hess_inv[0,0]/np.sqrt(n))
		#a2_shape_std = pars1.hess_inv[1,1]/np.sqrt(n)

		if qq_plots:
			q_grid = np.linspace(0.01,0.99,99)
			eq1 = np.quantile(a1_shortfalls,q = q_grid)
			eq2 = np.quantile(a2_shortfalls,q = q_grid)

			fq1 = genpareto(loc=0,scale=a1_scale_est,c=a1_shape_est).ppf(q=q_grid)
			fq2 = genpareto(loc=0,scale=a2_scale_est,c=a2_shape_est).ppf(q=q_grid)

			fig = plt.figure(figsize=(5,5))
			ax = fig.add_subplot(211)
			ax.scatter(eq1, fq1, color = '#004C99')
			ax.plot([0, max(eq1)], [0, max(eq1)], '--', color = '#FF8000')
			#ax.xlim(lineStart, lineEnd)
			#ax.ylim(lineStart, lineEnd)
			ax.set_xlabel('Empirical quantiles')
			ax.set_ylabel('Fitted quantiles')
			ax.set_title('Area 1')
			##plt.axis('scaled')

			ax = fig.add_subplot(212)
			ax.scatter(eq2, fq2, color = '#004C99')
			ax.plot([0, max(eq2)], [0, max(eq2)], '--', color = '#FF8000')
			#ax.xlim(lineStart, lineEnd)
			#ax.ylim(lineStart, lineEnd)
			ax.set_xlabel('Empirical quantiles')
			ax.set_ylabel('Fitted quantiles')
			ax.set_title('Area 2')
			##plt.axis('scaled')

			plt.tight_layout()
			plt.show()


		# res = {
		#   "area1":{
		#     "pars":{
		#       "scale":a1_scale_est,
		#       "c":a1_shape_est
		#     },
		#     "std":{
		#       "scale":a1_scale_std,
		#       "c":a1_shape_std
		#     }
		#   },
		#   "area2":{
		#     "pars":{
		#       "scale":a2_scale_est,
		#       "c":a2_shape_est
		#     },
		#     "std":{
		#       "scale":a2_scale_std,
		#       "c":a2_shape_std
		#     }
		#   }
		# }

		res = {
		  "a1_scale":a1_scale_est,
		  "a1_shape":a1_shape_est,
		  "a2_scale":a2_scale_est,
		  "a2_shape":a2_shape_est}

		return res
## Gráfico pp distribución completa

fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows = 2, ncols = 2, 
                                          dpi=1300)


pp_plot(logeados, stats.gennorm(beta = parametros_normal[0], 
                                loc = parametros_normal[1],
                                scale=parametros_normal[2]), 
        line = True, ax=ax1)

ax1.set_title('Normal generalizada', fontsize=11)


pp_plot(logeados, stats.genpareto(c = parametros_pareto[0], 
                                loc = parametros_pareto[1],
                                scale=parametros_pareto[2]), 
        line = True,ax=ax2)
ax2.set_title('Pareto generalizada', fontsize=11)

pp_plot(logeados, stats.dweibull(c = parametros_weibull[0], 
                                loc = parametros_weibull[1],
                                scale=parametros_weibull[2]), 
        line = True,ax=ax3)
ax3.set_title('Weibull doble', fontsize=11)

pp_plot(logeados, stats.gamma(a = parametros_gamma[0], 
                                loc = parametros_gamma[1],
                                scale=parametros_gamma[2]), 
        line = True,ax=ax4)
ax4.set_title('Gamma', fontsize=11)
Exemple #28
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
def value_distribution(num_samples):
    dist = genpareto(0.348238, loc=0, scale=214.476)
    return dist.rvs(num_samples)
Exemple #30
0
from scipy.stats import lognorm

dist_lognorm = lognorm(s=lognorm_params[1], scale=np.exp(lognorm_params[0]))

from scipy.stats import pareto

dist_pareto = pareto(b=pareto_params)

from scipy.stats import chi2

dist_chi2 = chi2(df=chi2_params)

from scipy.stats import genpareto

dist_genpareto = genpareto(c=genpareto_params)

from scipy.stats import expon

dist_expon = expon(scale=1 / exp_params)

x = np.linspace(0, 1, num=500)

import matplotlib.pyplot as plt

# Use # to uncomment the non-relevant lines and rerun this portion only to get a better view of the fit

#plt.plot(x, dist_gamma.ppf(x), color ='red', label='gamma')
#plt.plot(x, dist_pareto.ppf(x), color ='brown', label='pareto')
#plt.plot(x, dist_chi2.ppf(x), color ='purple', label='chi')
#plt.plot(x, dist_genpareto.ppf(x), color ='purple', label='chi')
import numpy as np
from scipy.stats import genpareto
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

c = 0.1
mean, var, skew, kurt = genpareto.stats(c, moments='mvsk')
x = np.linspace(genpareto.ppf(0.01, c),genpareto.ppf(0.99, c), 100)
ax.plot(x, genpareto.pdf(x, c),'r-', lw=5, alpha=0.6, label='genpareto pdf')
rv = genpareto(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
vals = genpareto.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genpareto.cdf(vals, c))
r = genpareto.rvs(c, size=1000)
ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
Exemple #32
0
    gpdparest_mlebinel = fitbinned(stats.genpareto, fp2, bp2, x0p)
    gpdparest_gmmbinelidentity = fitbinnedgmm(stats.genpareto, fp2, bp2, x0p)
    print('gpdparest_mlebinel', gpdparest_mlebinel)
    print('gpdparest_gmmbinelidentity', gpdparest_gmmbinelidentity)
    gpdparest_gmmquantile2 = fitquantilesgmm(stats.genpareto, p2rvs, start=x0p, pquant=None, frozen=None)
    print('gpdparest_gmmquantile2', gpdparest_gmmquantile2)
    #something wrong : something hard coded ?
    '''
    >>> fitquantilesgmm(stats.genpareto, p2rvs, start=x0p, pquant=np.linspace(0.5,0.95,10), frozen=None)
    Traceback (most recent call last):
      File "<pyshell#6>", line 1, in <module>
        fitquantilesgmm(stats.genpareto, p2rvs, start=x0p, pquant=np.linspace(0.5,0.95,10), frozen=None)
      File "C:\...\scikits\statsmodels\sandbox\stats\distribution_estimators.py", line 224, in fitquantilesgmm
        parest = optimize.fmin(lambda params:np.sum(momentcondquant(distfn, params, mom2s,(pq,xqs), shape=None)**2), start)
      File "c:\...\scipy-trunk_after\trunk\dist\scipy-0.8.0.dev6156.win32\programs\python25\lib\site-packages\scipy\optimize\optimize.py", line 183, in fmin
        fsim[0] = func(x0)
      File "c:\...\scipy-trunk_after\trunk\dist\scipy-0.8.0.dev6156.win32\programs\python25\lib\site-packages\scipy\optimize\optimize.py", line 103, in function_wrapper
        return function(x, *args)
      File "C:\...\scikits\statsmodels\sandbox\stats\distribution_estimators.py", line 224, in <lambda>
        parest = optimize.fmin(lambda params:np.sum(momentcondquant(distfn, params, mom2s,(pq,xqs), shape=None)**2), start)
      File "C:\...\scikits\statsmodels\sandbox\stats\distribution_estimators.py", line 210, in momentcondquant
        cdfdiff = distfn.cdf(xq, *params) - pq
    ValueError: shape mismatch: objects cannot be broadcast to a single shape
    '''
    print(fitquantilesgmm(stats.genpareto, p2rvs, start=x0p,
                          pquant=np.linspace(0.01,0.99,10), frozen=None))
    fp2, bp2 = np.histogram(p2rvs,
                    bins=stats.genpareto(2).ppf(np.linspace(0,0.99,10)))
    print('fitbinnedgmm equal weight bins')
    print(fitbinnedgmm(stats.genpareto, fp2, bp2, x0p))

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, dpi=1300)

pp_plot(logeados,
        stats.gennorm(beta=parametros_gennormal[0],
                      loc=parametros_gennormal[1],
                      scale=parametros_gennormal[2]),
        line=True,
        ax=ax1)

ax1.set_title('Normal generalizada', fontsize=11)

pp_plot(logeados,
        stats.genpareto(c=parametros_pareto[0],
                        loc=parametros_pareto[1],
                        scale=parametros_pareto[2]),
        line=True,
        ax=ax2)
ax2.set_title('Pareto generalizada', fontsize=11)

pp_plot(logeados,
        stats.dweibull(c=parametros_weibull[0],
                       loc=parametros_weibull[1],
                       scale=parametros_weibull[2]),
        line=True,
        ax=ax3)
ax3.set_title('Weibull doble', fontsize=11)

pp_plot(logeados,
        stats.gamma(a=parametros_gamma[0],
Exemple #34
0
import numpy as np
from scipy.stats import genpareto
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)

c = 0.1
mean, var, skew, kurt = genpareto.stats(c, moments='mvsk')
x = np.linspace(genpareto.ppf(0.01, c), genpareto.ppf(0.99, c), 100)
ax.plot(x, genpareto.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genpareto pdf')
rv = genpareto(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
vals = genpareto.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genpareto.cdf(vals, c))
r = genpareto.rvs(c, size=1000)
ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()