Exemple #1
0
  def testJohnsonSULogPDF(self):
    batch_size = 6
    skewness = tf.constant([1.0] * batch_size)
    tailweight = tf.constant([2.0] * batch_size)
    mu = tf.constant([3.0] * batch_size)
    sigma = tf.constant([math.sqrt(10.0)] * batch_size)
    x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
    johnson_su = tfd.JohnsonSU(skewness=skewness, tailweight=tailweight, loc=mu,
                               scale=sigma, validate_args=True)

    log_pdf = johnson_su.log_prob(x)
    self.assertAllEqual(
        self.evaluate(johnson_su.batch_shape_tensor()), log_pdf.shape)
    self.assertAllEqual(
        self.evaluate(johnson_su.batch_shape_tensor()),
        self.evaluate(log_pdf).shape)
    self.assertAllEqual(johnson_su.batch_shape, log_pdf.shape)
    self.assertAllEqual(johnson_su.batch_shape, self.evaluate(log_pdf).shape)

    pdf = johnson_su.prob(x)
    self.assertAllEqual(
        self.evaluate(johnson_su.batch_shape_tensor()), pdf.shape)
    self.assertAllEqual(
        self.evaluate(johnson_su.batch_shape_tensor()),
        self.evaluate(pdf).shape)
    self.assertAllEqual(johnson_su.batch_shape, pdf.shape)
    self.assertAllEqual(johnson_su.batch_shape, self.evaluate(pdf).shape)

    expected_log_pdf = sp_stats.johnsonsu(self.evaluate(skewness),
                                          self.evaluate(tailweight),
                                          self.evaluate(mu),
                                          self.evaluate(sigma)).logpdf(x)
    self.assertAllClose(expected_log_pdf, self.evaluate(log_pdf))
    self.assertAllClose(np.exp(expected_log_pdf), self.evaluate(pdf))
Exemple #2
0
def plot_johnson_su_fit(data,
                        fit_results,
                        title=None,
                        x_label=None,
                        x_range=None,
                        y_range=None,
                        fig_size=(6, 5),
                        bin_width=1,
                        filename=None):
    """
    :param data: (numpy.array) observations
    :param fit_results: dictionary with keys "a", "b", "loc", "scale", and "AIC"
    :param title: title of the figure
    :param x_label: label to show on the x-axis of the histogram
    :param x_range: (tuple) x range
    :param y_range: (tuple) y range
        (the histogram shows the probability density so the upper value of y_range should be 1).
    :param fig_size: int, specify the figure size
    :param bin_width: bin width
    :param filename: filename to save the figure as
    """

    plot_fit_continuous(data=data,
                        dist=stat.johnsonsu(a=fit_results['a'],
                                            b=fit_results['b'],
                                            loc=fit_results['loc'],
                                            scale=fit_results['scale']),
                        label='Johnson Su',
                        bin_width=bin_width,
                        title=title,
                        x_label=x_label,
                        x_range=x_range,
                        y_range=y_range,
                        fig_size=fig_size,
                        filename=filename)
Exemple #3
0
def adjust_noise(optimizer, epoch, args, noise_dict):
    keys = list(noise_dict.keys())
    keys.sort()
    select = keys[0]
    for k in keys:
        select = k
        if k > epoch:
            break

    if args.noisemodel == 'johnson':
        noise_model = st.johnsonsu(*noise_dict[select])
    if args.noisemodel == 'gaussian':
        print("GAUSSIAN NOISE set")
        noise_model = st.norm(*noise_dict[select])
    if args.noisemodel == 'laplace':
        noise_model = st.laplace(*noise_dict[select])

    if args.noisemodel == 'cauchy':
        noise_model = st.cauchy(*noise_dict[select])

    if args.noisemodel == 'gennorm':
        noise_model = st.gennorm(*noise_dict[select])

    if args.noisemodel == 'studentt':
        noise_model = st.t(*noise_dict[select])

    print('noise model is', args.noisemodel, noise_dict[select])
    optimizer.noise_generator = noise_model
Exemple #4
0
	def curvefit(self, series):
		"""Function accepts an iterable series of datapoints representing a johnson su distribution. The series is fit, tested
		for goodness of fit (p value greater than 0.1 and data mean value greater than 0.5) and the mean - 1/2 the std deviation
		is returned. None is returned if fit is bad.
		"""
		parameters = johnsonsu.fit(series)
		func = johnsonsu(*parameters)
		D, p = kstest(series, 'johnsonsu', N = len(series), args=parameters)
		if p < 0.1 or func.mean() < 0.5:
			return None
		return func.mean() - func.std() / 2
Exemple #5
0
    def fit(self):

        if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF:
            # It is hard to solve the johnson su curve when it is very close
            # to normality, so just use a normal curve instead.
            self.dist = norm(loc=self.m, scale=self.s)
            self.skew = 0.0
            self.kurt = 0.0

        else:
            a, b, loc, scale = self._johnsonsu_param(self.m, self.s, self.skew, self.kurt)
            self.dist = johnsonsu(a,b,loc=loc,scale=scale)
Exemple #6
0
 def test_dist(self):
     mean = 2.0
     std_dev = 1.5
     skew = 0.3
     kurt = 0.5
     a,b,loc,scale = self.c._johnsonsu_param(mean,std_dev,skew,kurt)
     x = johnsonsu(a,b,loc=loc,scale=scale)
     m1,v1,s1,k1 = x.stats(moments='mvsk')
     assert abs(mean - m1) < 0.00001
     assert abs(std_dev**2.0 - v1) < 0.00001
     assert abs(skew -s1) < 0.00001
     assert abs(kurt -k1) < 0.00001
Exemple #7
0
 def test_dist(self):
     mean = 2.0
     std_dev = 1.5
     skew = 0.3
     kurt = 0.5
     a, b, loc, scale = self.c._johnsonsu_param(mean, std_dev, skew, kurt)
     x = johnsonsu(a, b, loc=loc, scale=scale)
     m1, v1, s1, k1 = x.stats(moments='mvsk')
     assert abs(mean - m1) < 0.00001
     assert abs(std_dev**2.0 - v1) < 0.00001
     assert abs(skew - s1) < 0.00001
     assert abs(kurt - k1) < 0.00001
def fit_johnsonSu(data, x_label, fixed_location=0, figure_size=5):
    """
    :param data: (numpy.array) observations
    :param x_label: label to show on the x-axis of the histogram
    :param figure_size: int, specify the figure size
    :returns: dictionary with keys "a", "b", "loc", "scale", and "AIC"
    """

    # plot histogram
    fig, ax = plt.subplots(1, 1, figsize=(figure_size + 1, figure_size))
    ax.hist(data,
            normed=1,
            bins='auto',
            edgecolor='black',
            alpha=0.5,
            label='Frequency')

    # estimate the parameters
    a, b, loc, scale = scs.johnsonsu.fit(data, floc=fixed_location)

    # plot the estimated JohnsonSu distribution
    x_values = np.linspace(scs.johnsonsu.ppf(0.01, a, b, loc, scale),
                           scs.johnsonsu.ppf(0.99, a, b, loc, scale), 100)
    rv = scs.johnsonsu(a, b, loc, scale)
    ax.plot_all(x_values,
                rv.pdf(x_values),
                color=COLOR_CONTINUOUS_FIT,
                lw=2,
                label='JohnsonSu')

    ax.set_xlabel(x_label)
    ax.set_ylabel("Frequency")
    ax.legend()
    plt.show()

    # calculate AIC
    aic = AIC(k=3,
              log_likelihood=np.sum(
                  scs.johnsonsu.logpdf(data, a, b, loc, scale)))

    # report results in the form of a dictionary
    return {"a": a, "b": b, "loc": loc, "scale": scale, "AIC": aic}
Exemple #9
0
    def fit(self, mean, std=None, skew=None, kurt=None):
        if std == None:
            #Array or tuple format.
            self.m = mean[0]
            self.s = mean[1]
            self.skew = mean[2]
            self.kurt = mean[3]
        else:
            self.m = mean
            self.s = std
            self.skew = skew
            self.kurt = kurt

        if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF:  
            #It is hard to solve the johnson su curve when it is very close to normality, so just use a normal curve instead.
            self.dist = norm(loc=self.m,scale=self.s)
            self.skew = 0.0
            self.kurt = 0.0

        else:
            a,b,loc,scale = self._johnsonsu_param(self.m,self.s,self.skew,self.kurt)
            self.dist = johnsonsu(a,b,loc=loc,scale=scale)
Exemple #10
0
    def fit(self, mean, std=None, skew=None, kurt=None):
        if std == None:
            #Array or tuple format.
            self.m = mean[0]
            self.s = mean[1]
            self.skew = mean[2]
            self.kurt = mean[3]
        else:
            self.m = mean
            self.s = std
            self.skew = skew
            self.kurt = kurt

        if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF:
            #It is hard to solve the johnson su curve when it is very close to normality, so just use a normal curve instead.
            self.dist = norm(loc=self.m, scale=self.s)
            self.skew = 0.0
            self.kurt = 0.0

        else:
            a, b, loc, scale = self._johnsonsu_param(self.m, self.s, self.skew,
                                                     self.kurt)
            self.dist = johnsonsu(a, b, loc=loc, scale=scale)
#!/usr/bin/env python

from __future__ import print_function
import openturns as ot
import scipy as sp
import scipy.stats as st

for scipy_dist in [st.uniform(), st.johnsonsu(2.55, 2.25)]:
    sp.random.seed(42)

    # create an openturns distribution
    py_dist = ot.SciPyDistribution(scipy_dist)
    distribution = ot.Distribution(py_dist)

    print('distribution=', distribution)
    print('realization=', distribution.getRealization())
    sample = distribution.getSample(10000)
    print('sample=', sample[0:5])
    point = [0.6]
    print('pdf= %.6g' % distribution.computePDF(point))
    cdf = distribution.computeCDF(point)
    print('cdf= %.6g' % cdf)
    print('quantile=', distribution.computeQuantile(cdf))
    print('quantile (tail)=', distribution.computeQuantile(cdf, True))
    print('scalar quantile=%.6g' % distribution.computeScalarQuantile(cdf))
    print('scalar quantile (tail)=%.6g' %
          distribution.computeScalarQuantile(cdf, True))
    print('mean=', distribution.getMean())
    print('mean(sampling)=', sample.computeMean())
    print('std=', distribution.getStandardDeviation())
    print('std(sampling)=', sample.computeStandardDeviation())
Exemple #12
0
import collections as ct
import scipy.stats as st

income_model_dict = ct.OrderedDict()
income_model_dict['johnsonsu'] = st.johnsonsu(-5.3839367311065747,
                                              0.84376726932941271,
                                              -224.21280806585787,
                                              79.661998696081355)
income_model_dict['powerlaw'] = st.powerlaw(0.16342470577523971,
                                            -3.1423954341714262e-15,
                                            55664716.096562646)
income_model_dict['exponpow'] = st.exponpow(0.25441022752240294,
                                            -1.8475789041433829e-22,
                                            36120900.670255348)
income_model_dict['nakagami'] = st.nakagami(0.10038339454419823,
                                            -3.0390927147076284e-22,
                                            33062195.426077582)
income_model_dict['exponweib'] = st.exponweib(-3.5157658448986489,
                                              0.44492833350419714,
                                              -15427.454196748848,
                                              2440.0278856175246)

drivingdistance_model_dict = ct.OrderedDict()
drivingdistance_model_dict['nakagami'] = st.nakagami(0.11928581143831021,
                                                     14.999999999999996,
                                                     41.404620910360876)
drivingdistance_model_dict['ncx2'] = st.ncx2(0.30254190304723211,
                                             1.1286538320791935,
                                             14.999999999999998,
                                             8.7361471573932192)
drivingdistance_model_dict['chi'] = st.chi(0.47882729877571095,
Exemple #13
0
mean, var, skew, kurt = johnsonsu.stats(a, b, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(johnsonsu.ppf(0.01, a, b),
                johnsonsu.ppf(0.99, a, b), 100)
ax.plot(x, johnsonsu.pdf(x, a, b),
       'r-', lw=5, alpha=0.6, label='johnsonsu pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = johnsonsu(a, b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = johnsonsu.ppf([0.001, 0.5, 0.999], a, b)
np.allclose([0.001, 0.5, 0.999], johnsonsu.cdf(vals, a, b))
# True

# Generate random numbers:

r = johnsonsu.rvs(a, b, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
Exemple #14
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
Exemple #15
0
 def _define_distribution(df_sample_byuser, list_stage_id):
     return {sid: johnsonsu(*johnsonsu.fit(df_sample_byuser[sid].dropna()))
             for i, sid in enumerate(list_stage_id)}
Exemple #16
0


batch_per_step = int(args.batch/train_batch)
assert args.batch/train_batch<= batch_per_step

num_processes = args.nprocs

num_gpus = args.ngpus



    

if args.noisemodel == 'johnson':
    noise_model = st.johnsonsu(*args.noiseparams)
if args.noisemodel == 'gaussian':
    noise_model = st.norm(*args.noiseparams)
if args.noisemodel == 'laplace':
    noise_model = st.laplace(*args.noiseparams)
    
if args.noisemodel == 'cauchy':
    noise_model = st.cauchy(*args.noiseparams)

if args.noisemodel == 'gennorm':
    noise_model = st.gennorm(*args.noiseparams)

if args.noisemodel == 'studentt':
    noise_model = st.t(*args.noiseparams)