def __init__(self, lower, upper): if lower is None: self.lower = 0.0 else: self.lower = lower if upper is None: self.upper = 1.0 else: self.upper = upper if self.lower > self.upper: raise ValueError( 'Invalid Beta distribution parameters. Lower should be smaller than upper.' ) self.bounds = np.array([self.lower, self.upper]) self.x_range_for_pdf = np.linspace(self.lower, self.upper, RECURRENCE_PDF_SAMPLES) loc = self.lower scale = self.upper - self.lower self.parent = arcsine(loc=loc, scale=scale) self.mean, self.variance, self.skewness, self.kurtosis = self.parent.stats( moments='mvsk') self.shape_parameter_A = -0.5 self.shape_parameter_B = -0.5
mean, var, skew, kurt = arcsine.stats(moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(arcsine.ppf(0.01), arcsine.ppf(0.99), 100) ax.plot(x, arcsine.pdf(x), 'r-', lw=5, alpha=0.6, label='arcsine pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = arcsine() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = arcsine.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], arcsine.cdf(vals)) # True # Generate random numbers: r = arcsine.rvs(size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
axis.set_xlim([0.0, 3.0]) axis.set_ylim([0.0, 2.0]) axis.yaxis.set_ticks([0.0, 0.5, 1.0, 1.5, 2.0]) axis.set_title(f"Weibull Distribution: λ=1.0") for i in range(len(k)): pdf = stats.weibull(k[i], 1.0) pdf_values = [pdf(j) for j in x] axis.plot(x, pdf_values, label=f"k={k[i]}") axis.legend() config.save_post_asset(figure, "metropolis_hastings_sampling", "weibull_distribution_parameters") # %% ## arcsine x = numpy.linspace(0.001, 0.999, 200) pdf = [stats.arcsine(j) for j in x] figure, axis = pyplot.subplots(figsize=(10, 7)) axis.set_xlabel("X") axis.set_ylabel("PDF") axis.set_xlim([0.0, 1.0]) axis.set_title(f"Arcsine Distribution") axis.plot(x, pdf) config.save_post_asset(figure, "metropolis_hastings_sampling", "arcsine_distribution_parameters") # %% ## bimodal normal x = numpy.linspace(-7.0, 7.0, 200) pdf = [stats.bimodal_normal(j, 1.2) for j in x]
import matplotlib.pyplot as plt import pandas as pd import numpy as np import scipy.stats as sts # %matplotlib inline # Сгенерируем выборку объёма 1000 из arcsine распределения arcsine_rv = sts.arcsine() sample = arcsine_rv.rvs(1000) # Гистограмма выборки и теоретическая плотность распределения поверх нее: plt.hist(sample, normed=True) plt.ylabel('number of samples') # на том же графике построим теоретическую плотность распределения: x = np.linspace(0, 1, 1000) pdf = arcsine_rv.pdf(x) plt.plot(x, pdf, label='theoretical pdf', alpha=0.5) plt.legend() plt.ylabel('$f(x)$') plt.xlabel('$x$') def histogram(n, mean, variance): # Генерируем 1000 выборочных средних mead_sample = [np.mean(arcsine_rv.rvs(n)) for i in range(1000)] # Строим нормальное распределение на основе значений дисперсии и мат. ожидания norm_rv = sts.norm(mean, np.sqrt(variance / n)) # Строим гистограмму распределения выборочных средних
def arcsine(n=100, loc=0, scale=1): ''' arcsine distribution n - розмір вибірки loc - середина scale - відхилення Повертає список вигляду [згенерована вибірка, список x, список y, середнє, мода, медіана, розмах, девіація, варіанса, стандарт, варіація, асиметрія, ексцес] ''' distribution = st.arcsine(loc=loc, scale=scale) sample = list(distribution.rvs(size=n)) for i in range(len(sample)): sample[i] = round(sample[i], 2) var = list(sample) var.sort() x = list(set(sample)) y = list() x.sort() freq_table = dict() for num in x: freq_table[num] = sample.count(num) int_len = ((max(sample) - min(sample)) / r(sample)) int_bounds = list() next = min(sample) for i in range(r(sample)): int_bounds.append(round(next, 2)) next += int_len int_bounds.append(max(sample)) freq_table = dict() int_list = list() for i in range(len(int_bounds) - 1): int_list.append([int_bounds[i], int_bounds[i + 1]]) for i in range(len(int_list)): if i != len(int_list) - 1: freq_table["[" + str(int_list[i][0]) + "; " + str(int_list[i][1]) + ")"] = 0 else: freq_table["[" + str(int_list[i][0]) + "; " + str(int_list[i][1]) + "]"] = 0 for i in range(len(sample)): for j in range(len(int_list)): if sample[i] >= int_list[j][0] and sample[i] < int_list[j][ 1] and j != len(int_list) - 1: freq_table["[" + str(int_list[j][0]) + "; " + str(int_list[j][1]) + ")"] += 1 elif sample[i] >= int_list[j][0] and sample[i] <= int_list[j][ 1] and j == len(int_list) - 1: freq_table["[" + str(int_list[j][0]) + "; " + str(int_list[j][1]) + "]"] += 1 int_list_values = list() for key in freq_table: int_list_values.append(int(freq_table[key])) intr = list(freq_table.keys()) centered_int = list() for intr in int_list: centered_int.append(round(((intr[0] + intr[1]) / 2), 3)) freq_table_disc = dict() x = list(set(sample)) for num in x: freq_table_disc[num] = sample.count(num) result = list() result.append(sample) start = distribution.ppf(0.01) end = distribution.ppf(0.99) x = list(np.linspace(start, end, n)) y = list(distribution.pdf(x)) result.append(x) result.append(y) mean = np.mean(sample) result.append(mean) moda = list(mode(freq_table_disc).keys()) result.append(moda) med = statistics.median(sample) result.append(med) ro = max(sample) - min(sample) result.append(ro) deviation = dev(freq_table_disc) result.append(deviation) variansa = dev(freq_table_disc) / (len(sample) - 1) result.append(variansa) standart = math.sqrt(variansa) result.append(standart) variation = standart / np.mean(sample) asym = st.skew(sample) result.append(asym) ex = st.kurtosis(sample) result.append(ex) return result