def testJohnsonSULogPDF(self): batch_size = 6 skewness = tf.constant([1.0] * batch_size) tailweight = tf.constant([2.0] * batch_size) mu = tf.constant([3.0] * batch_size) sigma = tf.constant([math.sqrt(10.0)] * batch_size) x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32) johnson_su = tfd.JohnsonSU(skewness=skewness, tailweight=tailweight, loc=mu, scale=sigma, validate_args=True) log_pdf = johnson_su.log_prob(x) self.assertAllEqual( self.evaluate(johnson_su.batch_shape_tensor()), log_pdf.shape) self.assertAllEqual( self.evaluate(johnson_su.batch_shape_tensor()), self.evaluate(log_pdf).shape) self.assertAllEqual(johnson_su.batch_shape, log_pdf.shape) self.assertAllEqual(johnson_su.batch_shape, self.evaluate(log_pdf).shape) pdf = johnson_su.prob(x) self.assertAllEqual( self.evaluate(johnson_su.batch_shape_tensor()), pdf.shape) self.assertAllEqual( self.evaluate(johnson_su.batch_shape_tensor()), self.evaluate(pdf).shape) self.assertAllEqual(johnson_su.batch_shape, pdf.shape) self.assertAllEqual(johnson_su.batch_shape, self.evaluate(pdf).shape) expected_log_pdf = sp_stats.johnsonsu(self.evaluate(skewness), self.evaluate(tailweight), self.evaluate(mu), self.evaluate(sigma)).logpdf(x) self.assertAllClose(expected_log_pdf, self.evaluate(log_pdf)) self.assertAllClose(np.exp(expected_log_pdf), self.evaluate(pdf))
def plot_johnson_su_fit(data, fit_results, title=None, x_label=None, x_range=None, y_range=None, fig_size=(6, 5), bin_width=1, filename=None): """ :param data: (numpy.array) observations :param fit_results: dictionary with keys "a", "b", "loc", "scale", and "AIC" :param title: title of the figure :param x_label: label to show on the x-axis of the histogram :param x_range: (tuple) x range :param y_range: (tuple) y range (the histogram shows the probability density so the upper value of y_range should be 1). :param fig_size: int, specify the figure size :param bin_width: bin width :param filename: filename to save the figure as """ plot_fit_continuous(data=data, dist=stat.johnsonsu(a=fit_results['a'], b=fit_results['b'], loc=fit_results['loc'], scale=fit_results['scale']), label='Johnson Su', bin_width=bin_width, title=title, x_label=x_label, x_range=x_range, y_range=y_range, fig_size=fig_size, filename=filename)
def adjust_noise(optimizer, epoch, args, noise_dict): keys = list(noise_dict.keys()) keys.sort() select = keys[0] for k in keys: select = k if k > epoch: break if args.noisemodel == 'johnson': noise_model = st.johnsonsu(*noise_dict[select]) if args.noisemodel == 'gaussian': print("GAUSSIAN NOISE set") noise_model = st.norm(*noise_dict[select]) if args.noisemodel == 'laplace': noise_model = st.laplace(*noise_dict[select]) if args.noisemodel == 'cauchy': noise_model = st.cauchy(*noise_dict[select]) if args.noisemodel == 'gennorm': noise_model = st.gennorm(*noise_dict[select]) if args.noisemodel == 'studentt': noise_model = st.t(*noise_dict[select]) print('noise model is', args.noisemodel, noise_dict[select]) optimizer.noise_generator = noise_model
def curvefit(self, series): """Function accepts an iterable series of datapoints representing a johnson su distribution. The series is fit, tested for goodness of fit (p value greater than 0.1 and data mean value greater than 0.5) and the mean - 1/2 the std deviation is returned. None is returned if fit is bad. """ parameters = johnsonsu.fit(series) func = johnsonsu(*parameters) D, p = kstest(series, 'johnsonsu', N = len(series), args=parameters) if p < 0.1 or func.mean() < 0.5: return None return func.mean() - func.std() / 2
def fit(self): if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF: # It is hard to solve the johnson su curve when it is very close # to normality, so just use a normal curve instead. self.dist = norm(loc=self.m, scale=self.s) self.skew = 0.0 self.kurt = 0.0 else: a, b, loc, scale = self._johnsonsu_param(self.m, self.s, self.skew, self.kurt) self.dist = johnsonsu(a,b,loc=loc,scale=scale)
def test_dist(self): mean = 2.0 std_dev = 1.5 skew = 0.3 kurt = 0.5 a,b,loc,scale = self.c._johnsonsu_param(mean,std_dev,skew,kurt) x = johnsonsu(a,b,loc=loc,scale=scale) m1,v1,s1,k1 = x.stats(moments='mvsk') assert abs(mean - m1) < 0.00001 assert abs(std_dev**2.0 - v1) < 0.00001 assert abs(skew -s1) < 0.00001 assert abs(kurt -k1) < 0.00001
def test_dist(self): mean = 2.0 std_dev = 1.5 skew = 0.3 kurt = 0.5 a, b, loc, scale = self.c._johnsonsu_param(mean, std_dev, skew, kurt) x = johnsonsu(a, b, loc=loc, scale=scale) m1, v1, s1, k1 = x.stats(moments='mvsk') assert abs(mean - m1) < 0.00001 assert abs(std_dev**2.0 - v1) < 0.00001 assert abs(skew - s1) < 0.00001 assert abs(kurt - k1) < 0.00001
def fit_johnsonSu(data, x_label, fixed_location=0, figure_size=5): """ :param data: (numpy.array) observations :param x_label: label to show on the x-axis of the histogram :param figure_size: int, specify the figure size :returns: dictionary with keys "a", "b", "loc", "scale", and "AIC" """ # plot histogram fig, ax = plt.subplots(1, 1, figsize=(figure_size + 1, figure_size)) ax.hist(data, normed=1, bins='auto', edgecolor='black', alpha=0.5, label='Frequency') # estimate the parameters a, b, loc, scale = scs.johnsonsu.fit(data, floc=fixed_location) # plot the estimated JohnsonSu distribution x_values = np.linspace(scs.johnsonsu.ppf(0.01, a, b, loc, scale), scs.johnsonsu.ppf(0.99, a, b, loc, scale), 100) rv = scs.johnsonsu(a, b, loc, scale) ax.plot_all(x_values, rv.pdf(x_values), color=COLOR_CONTINUOUS_FIT, lw=2, label='JohnsonSu') ax.set_xlabel(x_label) ax.set_ylabel("Frequency") ax.legend() plt.show() # calculate AIC aic = AIC(k=3, log_likelihood=np.sum( scs.johnsonsu.logpdf(data, a, b, loc, scale))) # report results in the form of a dictionary return {"a": a, "b": b, "loc": loc, "scale": scale, "AIC": aic}
def fit(self, mean, std=None, skew=None, kurt=None): if std == None: #Array or tuple format. self.m = mean[0] self.s = mean[1] self.skew = mean[2] self.kurt = mean[3] else: self.m = mean self.s = std self.skew = skew self.kurt = kurt if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF: #It is hard to solve the johnson su curve when it is very close to normality, so just use a normal curve instead. self.dist = norm(loc=self.m,scale=self.s) self.skew = 0.0 self.kurt = 0.0 else: a,b,loc,scale = self._johnsonsu_param(self.m,self.s,self.skew,self.kurt) self.dist = johnsonsu(a,b,loc=loc,scale=scale)
def fit(self, mean, std=None, skew=None, kurt=None): if std == None: #Array or tuple format. self.m = mean[0] self.s = mean[1] self.skew = mean[2] self.kurt = mean[3] else: self.m = mean self.s = std self.skew = skew self.kurt = kurt if abs(self.skew) < NORMAL_CUTOFF and abs(self.kurt) < NORMAL_CUTOFF: #It is hard to solve the johnson su curve when it is very close to normality, so just use a normal curve instead. self.dist = norm(loc=self.m, scale=self.s) self.skew = 0.0 self.kurt = 0.0 else: a, b, loc, scale = self._johnsonsu_param(self.m, self.s, self.skew, self.kurt) self.dist = johnsonsu(a, b, loc=loc, scale=scale)
#!/usr/bin/env python from __future__ import print_function import openturns as ot import scipy as sp import scipy.stats as st for scipy_dist in [st.uniform(), st.johnsonsu(2.55, 2.25)]: sp.random.seed(42) # create an openturns distribution py_dist = ot.SciPyDistribution(scipy_dist) distribution = ot.Distribution(py_dist) print('distribution=', distribution) print('realization=', distribution.getRealization()) sample = distribution.getSample(10000) print('sample=', sample[0:5]) point = [0.6] print('pdf= %.6g' % distribution.computePDF(point)) cdf = distribution.computeCDF(point) print('cdf= %.6g' % cdf) print('quantile=', distribution.computeQuantile(cdf)) print('quantile (tail)=', distribution.computeQuantile(cdf, True)) print('scalar quantile=%.6g' % distribution.computeScalarQuantile(cdf)) print('scalar quantile (tail)=%.6g' % distribution.computeScalarQuantile(cdf, True)) print('mean=', distribution.getMean()) print('mean(sampling)=', sample.computeMean()) print('std=', distribution.getStandardDeviation()) print('std(sampling)=', sample.computeStandardDeviation())
import collections as ct import scipy.stats as st income_model_dict = ct.OrderedDict() income_model_dict['johnsonsu'] = st.johnsonsu(-5.3839367311065747, 0.84376726932941271, -224.21280806585787, 79.661998696081355) income_model_dict['powerlaw'] = st.powerlaw(0.16342470577523971, -3.1423954341714262e-15, 55664716.096562646) income_model_dict['exponpow'] = st.exponpow(0.25441022752240294, -1.8475789041433829e-22, 36120900.670255348) income_model_dict['nakagami'] = st.nakagami(0.10038339454419823, -3.0390927147076284e-22, 33062195.426077582) income_model_dict['exponweib'] = st.exponweib(-3.5157658448986489, 0.44492833350419714, -15427.454196748848, 2440.0278856175246) drivingdistance_model_dict = ct.OrderedDict() drivingdistance_model_dict['nakagami'] = st.nakagami(0.11928581143831021, 14.999999999999996, 41.404620910360876) drivingdistance_model_dict['ncx2'] = st.ncx2(0.30254190304723211, 1.1286538320791935, 14.999999999999998, 8.7361471573932192) drivingdistance_model_dict['chi'] = st.chi(0.47882729877571095,
mean, var, skew, kurt = johnsonsu.stats(a, b, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(johnsonsu.ppf(0.01, a, b), johnsonsu.ppf(0.99, a, b), 100) ax.plot(x, johnsonsu.pdf(x, a, b), 'r-', lw=5, alpha=0.6, label='johnsonsu pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = johnsonsu(a, b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = johnsonsu.ppf([0.001, 0.5, 0.999], a, b) np.allclose([0.001, 0.5, 0.999], johnsonsu.cdf(vals, a, b)) # True # Generate random numbers: r = johnsonsu.rvs(a, b, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
def _define_distribution(df_sample_byuser, list_stage_id): return {sid: johnsonsu(*johnsonsu.fit(df_sample_byuser[sid].dropna())) for i, sid in enumerate(list_stage_id)}
batch_per_step = int(args.batch/train_batch) assert args.batch/train_batch<= batch_per_step num_processes = args.nprocs num_gpus = args.ngpus if args.noisemodel == 'johnson': noise_model = st.johnsonsu(*args.noiseparams) if args.noisemodel == 'gaussian': noise_model = st.norm(*args.noiseparams) if args.noisemodel == 'laplace': noise_model = st.laplace(*args.noiseparams) if args.noisemodel == 'cauchy': noise_model = st.cauchy(*args.noiseparams) if args.noisemodel == 'gennorm': noise_model = st.gennorm(*args.noiseparams) if args.noisemodel == 'studentt': noise_model = st.t(*args.noiseparams)