def generate_dist( size=1 ): # ======================================================================================== global frozen_dist, scale scale = 1 / size norm.stats(scale=scale) frozen_dist = norm()
def testNormal(mu, sigma, N, size): values = [[lhw.rnnorm1(mu, sigma) for i in range(size)], [lhw.rnnorm2(mu, sigma, N) for i in range(size)]] mean, var = norm.stats(mu, sigma, moments='mv') distNames = ["rnnrm1", "rnnrm2"] startWork(values, mean, var, distNames) pass
class TestArtificialSample: classes = np.logspace(0, 5, 101) * 0.02 classes_phi = to_phi(classes) c1 = norm.pdf(classes_phi, 10, 1.0) * interval_phi(classes_phi) c2 = norm.pdf(classes_phi, 7.5, 1.0) * interval_phi(classes_phi) c3 = norm.pdf(classes_phi, 5, 1.0) * interval_phi(classes_phi) distribution = c1 * 0.1 + c2 * 0.4 + c3 * 0.5 components = [c1, c2, c3] proportions = [0.1, 0.4, 0.5] m, v, s, k = norm.stats([10, 7.5, 5], [1.0, 1.0, 1.0], moments="mvsk") std = np.sqrt(v) moments = (m, std, s, k) sample = ArtificialSample("Sample", classes, classes_phi, distribution, components, proportions, moments) def test_iter(self): assert len(self.sample) == 3 for i, component in enumerate(self.sample): assert component.mean == [10, 7.5, 5][i] assert component.sorting_coefficient == 1.0 def test_index(self): assert self.sample[0].mean == 10.0 assert self.sample[-1].mean == 5.0 def test_slice(self): for component in self.sample[:-1]: pass def test_has_sample(self): sample = self.sample.sample assert isinstance(sample, Sample)
def get_stats(name, **kwargs): ''' Takes a distribution name and paras, and returns key statistics. Note: for stat-getting the only choice is to use *scipy*. We need to be careful to ensure the parametrization matches with that used in our get_generator fn, which runs on numpy Generator methods. Thus, kwargs here follow *numpy* namings. ''' _sp = "mv" # moment specification for scipy.stats computations. if name == "lognormal": mean, var = lognorm.stats(s=kwargs["sigma"], scale=np.exp(kwargs["mean"]), moments=_sp) elif name == "normal": mean, var = norm.stats(loc=kwargs["loc"], scale=kwargs["scale"], moments=_sp) elif name == "pareto": mean, var = pareto.stats(b=kwargs["shape"], scale=kwargs["scale"], moments=_sp) else: raise ValueError("Please provide a proper distribution name.") return {"mean": mean, "var": var}
def testNormal(): # {{{ """ Normal Distribution (正态分布) 正态分布是一种连续分布,其函数可以在实线上的任何地方取值。 正态分布由两个参数描述:分布的平均值μ和方差σ2 。 mu ---> loc sigma ---> scale """ mu = 2 sigma = 4 xs = np.linspace(norm.ppf(0.01, loc=mu, scale=sigma), norm.ppf(0.99, loc=mu, scale=sigma), num=1000) # E(X) = mu, D(X) = sigma**2 mean, var, skew, kurt = norm.stats(loc=mu, scale=sigma, moments='mvsk') print("mean: %.2f, var: %.2f, skew: %.2f, kurt: %.2f" % (mean, var, skew, kurt)) fig, axs = plt.subplots(2, 2) # 显示pdf (norm.pdf) ys = norm.pdf(xs, loc=mu, scale=sigma) axs[0][0].plot(xs, ys, 'bo', markersize=5, label='norm.pdf') axs[0][0].legend() axs[0][0].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma)) # 显示pdf (manual) ys = np.exp(-((xs - mu)**2) / (2 * sigma**2)) / (sigma * np.sqrt(2 * np.pi)) axs[0][1].plot(xs, ys, 'bo', markersize=5, label='cmp pdf') axs[0][1].legend() axs[0][1].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma)) # 显示cdf ys = norm.cdf(xs, loc=mu, scale=sigma) axs[1][0].plot(xs, ys, 'bo', markersize=5, label='norm.pdf') axs[1][0].legend() axs[1][0].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma)) # 随机变量RVS data = norm.rvs(loc=mu, scale=sigma, size=1000) data = np.around(data, decimals=1) import sys sys.path.append("../../thinkstats") import Pmf pmf = Pmf.MakePmfFromList(data) xs, ys = pmf.Render() # axs[1][1].plot(xs, ys, 'bo', markersize=5, label='rvs pmf') axs[1][1].scatter(xs, ys, label='rvs pmf') axs[1][1].legend() plt.show()
def interpret(parameters: np.ndarray, classes: np.ndarray, interval: float): n_samples, n_components, n_classes = classes.shape assert parameters.ndim == 3 assert parameters.shape == (n_samples, Normal.N_PARAMETERS + 1, n_components) locations = np.expand_dims(parameters[:, 0, :], 2).repeat(n_classes, 2) scales = np.expand_dims(relu(parameters[:, 1, :]), 2).repeat(n_classes, 2) proportions = np.expand_dims(softmax(parameters[:, 2, :], axis=1), 1) components = norm.pdf(classes, loc=locations, scale=scales) * interval m, v, s, k = norm.stats(loc=locations[:, :, 0], scale=scales[:, :, 0], moments="mvsk") return proportions, components, (m, np.sqrt(v), s, k)
def cornishfisher(self, confidence_level=0.99): """Return a VaR that is adjusted based on the skewness and/or kurtosis of the actual distribution.""" data = yf.download(self.symbol, start=self.start, end=self.end) rets = data['Close'] / data['Close'].shift(1) rets = rets[1:] mean = np.mean(rets) std = np.std(rets) z = norm.ppf(confidence_level) mean, var, skew, kurt = norm.stats(moments='mvsk') z = (z + (z**2 - 1) * skew / 6 + (z**3 - 3 * z) * (kurt - 3) / 24 - (2 * z**3 - 5 * z) * (skew**2) / 36) return -(mean + z * std)
class TestArtificialComponent: classes = np.logspace(0, 5, 101) * 0.02 classes_phi = to_phi(classes) distribution = norm.pdf(classes_phi, 5, 1.0) * interval_phi(classes_phi) proportion = 0.5 m, v, s, k = norm.stats(5, 1.0, moments="mvsk") std = np.sqrt(v) moments = (m, std, s, k) component = ArtificialComponent(classes, classes_phi, distribution, proportion, moments) def test_properties(self): assert self.component.mean == self.m assert self.component.sorting_coefficient == self.std assert self.component.skewness == self.s assert self.component.kurtosis == self.k assert isinstance(self.component.moments, dict)
def zhengtai(): fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = norm.stats(moments='mvsk') x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100) x = np.append( x, np.linspace(norm.ppf(0.01, loc=1, scale=1), norm.ppf(0.99, loc=1, scale=1), 100)) x = np.unique(x, axis=0) ax.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6, label='norm pdf') ax.plot(x, norm.pdf(x, loc=1, scale=1), 'r-', lw=2, alpha=0.6, label='norm pdf') plt.show() #X<40, norm.cdf(40, loc=50, scale=10) #累计概率为0.025时的反函数 norm.ppf(0.025, loc=0, scale=1)
def rate4site(): curdir = os.getcwd() curdir_up = '/'.join(curdir.split('/')[:-1]) rate_path = "%s/RESULT/MODULE2-SMALLER" % curdir_up for curdir, subdirs, files in os.walk(rate_path): for file in files[:]: if '_rate' in file: query = file.split('_')[0] outpath = os.path.join(curdir, file) rate_file = open(outpath) confidence = [] data_reqs = [] fil_data = [] scores = [] residues = [] range1 = [] range2 = [] y_count = [] for i, interval in enumerate(rate_file): if i == 0: True else: if not interval.startswith('#') and interval != '\n': interval = interval.split(',') parts1 = interval[0].split() seq = parts1[0] res = parts1[1] residues.append(res) y_count.append(seq) score = parts1[2] scores.append(float(score)) range1.append(parts1[3][1:]) range1 = [item for item in range1 if item.strip()] parts2 = interval[-1].split() range2.append(parts2[0][:-1]) range2 = [item for item in range2 if item.strip()] range_diff = range2 + range1 confidence.append(interval) data_reqs.append([score, range_diff]) fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = norm.stats(moments='mvsk') rang = [min(scores), max(scores)] Long = len(scores) Maxim = max(scores) #MaxValue Minim = min(scores) #MinValue av = np.mean(scores) #Average StDev = np.std(scores) #Standard Dev. x = np.linspace(Minim, Maxim, Long) ax.plot(x, norm.pdf(x, av, StDev), 'r-', lw=3, alpha=0.9, label='RATE SCORES') weights = np.ones_like(scores) / len(scores) normalized = [(s - min(scores)) / (max(scores) - min(scores)) for s in scores] newpath = os.path.join(curdir) ax.hist(normalized, weights=weights, normed=True, histtype='stepfilled', alpha=0.2, label='NORMALIZED RATE SCORES') plt.title('%s' % query + '_Normalized_Rate4Site_Scores') plt.xlabel('Rate4Site_Scores', fontsize=14) plt.ylabel('Sequence_Count', fontsize=14) plt.legend(loc='upper right') fig.savefig(newpath + '_%s_normalized.png' % query) plt.close("all") y_count = map(int, y_count) color_path = os.path.join(curdir) bins = np.arange(floor(min(normalized[:])), ceil(max(normalized[:])), 0.10) colors = ('blue', 'red', 'green', 'cyan', 'purple', 'pink', 'violet', 'lime', 'aqua') # get the max count for a particular bin for all classes combined max_bin = max(np.histogram(normalized[:], bins=bins)[0]) plt.figure() n, bins, patches = plt.hist(normalized[:], bins, alpha=0.3) for c, p in zip(colors, patches): plt.setp(p, 'facecolor', c) plt.ylim([0, max_bin * 1.3]) plt.title('%s' % query + '_Normalized_Scores_In_9_Bins') plt.xlabel('Color_Bins', fontsize=14) plt.ylabel('Sequence_Count', fontsize=14) plt.legend(loc='upper right') plt.savefig(newpath + '_%s_ColorBins.png' % query) plt.close("all") pairs = [(x, y, z) for x, y, z in zip(normalized, residues, y_count)] group1 = [] group2 = [] group3 = [] for item in pairs[:]: if item[0] <= 0.4: group1.append(item) elif 0.5 <= item[0] <= 0.6: group2.append(item) else: group3.append(item) conserved_residues = open( newpath + '/%s_conserved_residues.txt' % query, 'w') respo = [] for items in group1[:]: res_po = tuple((items[2], items[1])) respo.append(res_po) strs = " ".join(str(x) for x in respo) conserved_residues.write(strs + "\n") conserved_residues.close()
# -*- coding: utf-8 -*- #https://docs.scipy.org/doc/scipy/reference/stats.html #https://docs.scipy.org/doc/scipy/reference/tutorial/stats.html #import scipy #scipy.stats package is imported as from scipy import stats # individual objects are imported as from scipy.stats import norm norm.cdf(0) norm.cdf([-1., 0, 1]) import numpy as np norm.cdf(np.array([-1., 0, 1])) norm.mean(), norm.std(), norm.var() norm.stats(moments="mv") norm.ppf(0.5) norm.rvs(size=3) #random nos #drawing random numbers relies on generators from numpy.random package. In the example above, the specific stream of random numbers is not reproducible across runs. To achieve reproducibility, you can explicitly seed a global variable np.random.seed(1234) #Relying on a global state is not recommended though. A better way is to use the random_state parameter which accepts an instance of numpy.random.RandomState class, or an integer which is then used to seed an internal RandomState object: norm.rvs(size=5, random_state=1234) norm.rvs(5) #one no only #Shifting and Scaling¶ #All continuous distributions take loc and scale as keyword parameters to adjust the location and scale of the distribution, e.g. for the standard normal distribution the location is the mean and the scale is the standard deviation. norm.stats(loc=3, scale=4, moments="mv") #uniform distribution from scipy.stats import uniform
from scipy.stats import norm import numpy as np import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = norm.stats(moments='mvsk') x = np.linspace(0, 10, 100) # 2, 0.1; 8, 0.1 # 4.8, 0.1; 5.2, 0.1 # 4.8, 1; 5.2, 1 # 2, 1; 8, 1 ax.plot(x, norm.pdf(x, 2, 1), 'r-', lw=5, alpha=0.6, label='norm pdf') ax.plot(x, norm.pdf(x, 8, 1), 'b-', lw=5, alpha=0.6, label='norm pdf') plt.show() fig.savefig('graph.png')
import matplotlib matplotlib.use('Agg') import scipy.stats import matplotlib.pyplot as plt import seaborn as sns import numpy as np from scipy.stats import uniform, pareto, norm #mean, var, skew, kurt = b = 1.0 dists = [] dists += [[("pareto"), pareto.stats(2, moments='mvsk'), pareto]] dists += [[("uniform"), uniform.stats(moments='mvsk'), uniform]] dists += [[("normal"), norm.stats(moments='mvsk'), norm]] dists += [[("normal_sc"), norm.stats(moments='mvsk'), norm]] print(dists) size = 20000 for dist in dists: print dist[0] if (dist[0] == "pareto"): sample = dist[2].rvs(b, size=size) sample = sample[(sample < 8)] if (dist[0] == "normal"): sample = dist[2].rvs(size=size) if (dist[0] == "uniform"):
plt.legend(loc='upper left', shadow=True) plt.show() # ### Gaussian (Normal) Distribution # In[9]: #Gaussian (Normal) Distribution from scipy.stats import norm loc, scale = 1, 2 # Mean and Variance x = np.linspace(norm.ppf(0.01, loc, scale), norm.ppf(0.99, loc, scale), 25) #Percent Point Function (inverse of cdf — percentiles) print("Mean : ", norm.stats(loc, scale, moments='m')) print("Variance : ", norm.stats(loc, scale, moments='v')) print("Prob. Dens. Func. : ", norm.pdf(x, loc, scale)) print("Cum. Density Func.: ", norm.cdf(x, loc, scale)) CDF = norm.cdf(x, loc, scale) fig = plt.figure(figsize=(20, 10)) plt.subplot(221) plt.plot(x, norm.pdf(x, loc, scale), 'g', ms=8, label='PDF') plt.xlabel("Sample Space of Gaussian Distribution", fontsize=14) plt.ylabel("PDF", fontsize=14) plt.title("Probability Distribution of Gaussian(µ=1,σ=2) Distribution", fontsize=16) plt.xticks(np.arange(-5, 7, 1)) plt.yticks(np.arange(0, 0.30, 0.05))
import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm np.random.seed(5) # Normal Distribution mean, var = norm.stats(moments='mv') std = norm.std() fig, ax = plt.subplots(1, 1) x = np.linspace(norm.ppf(0.05), norm.ppf(0.95)) ax.plot(x, norm.pdf(x), 'b-', lw=3, alpha=0.6, label='Gaussian') q1 = norm.ppf(.25) median = norm.ppf(.5) q3 = norm.ppf(.75) plt.title( 'Gaussian Distribution ($\mu$: {:.2f}, $\sigma$: {:.2f}, $\sigma^2$: {:.2f})' .format(mean, std, var), size='xx-large') plt.xlabel('X', size='large') plt.ylabel('P(X)', size='large') # Quartile lines ax.axvline(x=q1, linewidth=3, alpha=0.6, color='black', linestyle='dashed') ax.axvline(x=median, linewidth=3, alpha=0.6, color='black', linestyle='dashed') ax.axvline(x=q3, linewidth=3, alpha=0.6, color='black', linestyle='dashed')
from mpl_toolkits.mplot3d import Axes3D import pandas as pd import seaborn as sns from sklearn.model_selection import train_test_split from scipy.stats import cauchy, norm, t from sklearn.decomposition import PCA from sklearn.manifold import TSNE # Student's t Distribution with Degree of Freedom = 1 and Normal/Gaussian Distrubution are used for Measuring Similarities x = np.linspace(-10,10,500) my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","green","blue","gold","purple","black"]) # Degree of Freedom df = 1 n_mean, n_var, n_skew, n_kurt = norm.stats(moments='mvsk') c_mean, c_var, c_skew, c_kurt = t.stats(df,moments='mvsk') plt.plot(x,norm.pdf(x),'r',label='Normal Distribution') plt.plot(x,cauchy.pdf(x),'b',label="Student's t Distribution") plt.grid() plt.legend() plt.title('Normal Distribution vs Student t-Distribution') plt.savefig('Images/SimilarityDistributions.png') plt.show() # Loading Dataset Train = pd.read_csv('Dataset/train.csv') X_Train = ((Train.loc[:, Train.columns != 'label']).to_numpy())
def get_moments(*args) -> dict: assert len(args) == len(NormalDistribution.get_parameter_names()) m, v, s, k = norm.stats(*args, moments="mvsk") std = np.sqrt(v) moments = dict(mean=m, std=std, skewness=s, kurtosis=k) return moments
# from http://matplotlib.org/users/screenshots.html#slider-demo import matplotlib.pyplot as plt import numpy as np from matplotlib.widgets import Slider, Button from scipy.stats import norm import prettyplotlib as ppl fig, ax = plt.subplots(1) plt.subplots_adjust(left=0.1, bottom=0.25) # Default parameters of the distribution params = norm.stats() # Initial plot x = np.linspace(-5, 5, 100) line_pdf, = ppl.plot(ax, x, norm.pdf(x, *params), lw=2, color='red', label="pdf") line_cdf, = ppl.plot(ax, x, norm.cdf(x, *params), lw=1, color='lightgrey', label="cdf") ppl.legend(ax) # plt.axis([-10, 10, 0, 1])
print('Normal distribution CDF X<=0: ', norm.cdf(0)) a = np.array([-1, 0, 1]) print(norm.cdf(a)) print(norm.ppf(0.05)) print(norm.ppf(0.025)) # generating random normal variates: r = norm.rvs(0, 1, size=10) print(r) ###################### # shifting and scaling: # All continuous distributions take loc and scale as keyword # parameters to adjust the location and scale of the distribution # for example for normal dist, location is the mean and scale is standard deviation # stat keyword gives moment of the distribution print('\n', 'Shifting and Scaling:') a = norm.stats(loc=3, scale=4, moments="mv") print(a) aVec = norm.stats(loc=[0, 1, 2], scale=[1, 2, 4], moments="mv") print('vectorized: ') print(aVec) # random numbers from multivariate normal # does not give Multivariate normal! print('MULTI?! Does not give multivariate Normal!') rvs = norm.rvs(loc=np.array([0, 1]), scale=np.array([[1, 0], [0, 1]])) print(rvs) print('\n\n') ############################################################# ############################################################# # In general the standardized distribution for a random variable X is # obtained through the transformation (X - loc) / scale. The # default values are loc = 0 and scale = 1.
print(a) b = np.array([[5], [6]]) print(b) print(linalg.inv(a).dot(b)) print(a.dot(linalg.inv(a).dot(b)) - b) print(np.linalg.solve(a, b)) print(a.dot(np.linalg.solve(a, b)) - b) print('-----') # Step 4. Common Methods in stats print(norm.cdf(0)) print(norm.cdf([-1., 0, 1])) print(norm.cdf(np.array([-1., 0, 1]))) print(norm.mean(), norm.std(), norm.var()) print(norm.stats(moments="mv")) print(norm.ppf(0.5)) print(norm.rvs(size=3)) print(np.random.seed(1234)) print(norm.rvs(size=5, random_state=1234)) print(norm.rvs(5)) print('-----') # Step 5. Broadcasting print(stats.t.isf([0.1, 0.05, 0.01], [[10], [11]])) print(stats.t.isf([0.1, 0.05, 0.01], 10)) print(stats.t.isf([0.1, 0.05, 0.01], [10, 11, 12])) print('-----') # Step 6. Anlysing one sample np.random.seed(282629734)
import numpy as np from scipy import linalg A = np.array([[1,2],[3,4]]) la,v = linalg.eig(A) l1,l2 = la print(l1, l2) print(v[:,0]) print(v[:,1]) print(np.sum(abs(v**2),axis=0)) v1 = np.array(v[:,0]).T print(linalg.norm(A.dot(v1)-l1*v1)) from scipy.stats import norm r = norm.rvs(loc=0, scale=1, size=1000) print(r) print(norm.stats(moments='mvsk')) from scipy import stats import numpy as np x = np.random.random(10) y = np.random.random(10) slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) print({'slope':slope,'intercept':intercept}) print({'p_value':p_value,'r-squared':round(r_value**2,2)}) import numpy as np from scipy.optimize import minimize Define the function def rosen(x):
## Funções estatísticas O módulo *stats* contém uma grande quantidade de distribuições de probabilidade (124 atualmente), e uma biblioteca em constante crescimento de funções estatísticas, incluindo estatística descritiva, distribuições de frequência, correlações, testes, transformações, distâncias, estatística circular, etc. Também há duas classes que facilitam a criação de distribuições customizadas: *rv_continuous*, para distribuições contínuas, e *rv_discrete*, para discretas. ### Usando distribuições Nesta Seção, tomaremos como exemplo a distribuição normal, representada pela classe *scipy.stats.norm*, que herda todos métodos genéricos da classe *rv_continuous*. A classe *norm* representa a forma normal padrão, i.e. sua função densidade de probabilidade (fdp) para um número real $x$ é: $$f(x) = \frac{e^{-x^{2}/2}}{\sqrt{2\pi}}.$$ Para importar a ditribuição normal, fazemos: from scipy.stats import norm Para calcular alguns momentos: mean, var, skew, kurt = norm.stats(moments='mvsk') print('média: {}, variância: {}, assimetria: {}, curtose: {}'.format(mean, var, skew, kurt)) Como dito acima, essa classe implementa a distribuição normal padrão. Para deslocar ou mudar a escala da distribuição, i.e. informar média e desvio-padrão, pode-se infomar os parâmetros *loc* e *scale*, respectivamente. Exemplo: mean, var = norm.stats(loc=2, scale=0.5, moments='mv') print('média: {}, variância: {}'.format(mean, var, skew, kurt)) Para calcular a fdp para um número real $x$, fazemos: x = 1 print(norm.pdf(x, loc=2, scale=0.5)) A função *pdf* também pode ser usada para calcular a fdp para cada elemento de um *array*: import numpy as np
# A large number of probability distributions as well as a growing library of statistical functions are available in `scipy.stats`. See http://docs.scipy.org/doc/scipy/reference/stats.html for a complete list. # 正态分布等 # Generate random numbers from normal distribution: # In[29]: from scipy.stats import norm #morm函数 r = norm.rvs(loc=0, scale=1, size=1000) #rvs正态分布,loc均值,scale标准差,size生成多少个随机数——r即生成的随机数 # Calculate a few first moments: # In[30]: mean, var, skew, kurt = norm.stats(moments='mvsk') #统计:四阶矩 # # Linear regression model (线性回归模型) # # This example computes a least-squares regression for two sets of measurements. # In[31]: from scipy import stats import numpy as np x = np.random.random(10) y = np.random.random(10) slope, intercept, r_value, p_value, std_err = stats.linregress( x, y) #.linregress:做简单线性模型;std_err:标准差 print({'slope': slope, 'intercept': intercept})
""" Stats distributions tests""" from scipy.stats import norm, cauchy import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = cauchy.stats(moments='mvsk') print kurt mean_n, var_n, skew_n, kurt_n = norm.stats(moments='mvsk') print mean_n print var_n, skew_n, kurt_n