Python stats Examples, scipy.stats.norm.stats Python Examples

Example #1

0

Show file

File: utils.py Project: Zaedlen/r2p2

def generate_dist(
    size=1
):  # ========================================================================================
    global frozen_dist, scale
    scale = 1 / size
    norm.stats(scale=scale)
    frozen_dist = norm()

Example #2

0

Show file

def testNormal(mu, sigma, N, size):
    values = [[lhw.rnnorm1(mu, sigma) for i in range(size)],
              [lhw.rnnorm2(mu, sigma, N) for i in range(size)]]
    mean, var = norm.stats(mu, sigma, moments='mv')
    distNames = ["rnnrm1", "rnnrm2"]
    startWork(values, mean, var, distNames)
    pass

Example #3

0

Show file

File: test_artificial_dataset.py Project: yuriok/QGrain

class TestArtificialSample:
    classes = np.logspace(0, 5, 101) * 0.02
    classes_phi = to_phi(classes)
    c1 = norm.pdf(classes_phi, 10, 1.0) * interval_phi(classes_phi)
    c2 = norm.pdf(classes_phi, 7.5, 1.0) * interval_phi(classes_phi)
    c3 = norm.pdf(classes_phi, 5, 1.0) * interval_phi(classes_phi)
    distribution = c1 * 0.1 + c2 * 0.4 + c3 * 0.5
    components = [c1, c2, c3]
    proportions = [0.1, 0.4, 0.5]
    m, v, s, k = norm.stats([10, 7.5, 5], [1.0, 1.0, 1.0], moments="mvsk")
    std = np.sqrt(v)
    moments = (m, std, s, k)
    sample = ArtificialSample("Sample", classes, classes_phi, distribution,
                              components, proportions, moments)

    def test_iter(self):
        assert len(self.sample) == 3
        for i, component in enumerate(self.sample):
            assert component.mean == [10, 7.5, 5][i]
            assert component.sorting_coefficient == 1.0

    def test_index(self):
        assert self.sample[0].mean == 10.0
        assert self.sample[-1].mean == 5.0

    def test_slice(self):
        for component in self.sample[:-1]:
            pass

    def test_has_sample(self):
        sample = self.sample.sample
        assert isinstance(sample, Sample)

Example #4

0

Show file

def get_stats(name, **kwargs):
    '''
    Takes a distribution name and paras,
    and returns key statistics.

    Note: for stat-getting the only choice
    is to use *scipy*. We need to be careful
    to ensure the parametrization matches
    with that used in our get_generator fn,
    which runs on numpy Generator methods.
    Thus, kwargs here follow *numpy* namings.
    '''

    _sp = "mv"  # moment specification for scipy.stats computations.

    if name == "lognormal":
        mean, var = lognorm.stats(s=kwargs["sigma"],
                                  scale=np.exp(kwargs["mean"]),
                                  moments=_sp)
    elif name == "normal":
        mean, var = norm.stats(loc=kwargs["loc"],
                               scale=kwargs["scale"],
                               moments=_sp)
    elif name == "pareto":
        mean, var = pareto.stats(b=kwargs["shape"],
                                 scale=kwargs["scale"],
                                 moments=_sp)
    else:
        raise ValueError("Please provide a proper distribution name.")

    return {"mean": mean, "var": var}

Example #5

0

Show file

def testNormal():  # {{{
    """
    Normal Distribution (正态分布)
    正态分布是一种连续分布，其函数可以在实线上的任何地方取值。
    正态分布由两个参数描述：分布的平均值μ和方差σ2 。

    mu ---> loc
    sigma ---> scale

    """

    mu = 2
    sigma = 4
    xs = np.linspace(norm.ppf(0.01, loc=mu, scale=sigma),
                     norm.ppf(0.99, loc=mu, scale=sigma),
                     num=1000)

    # E(X) = mu, D(X) = sigma**2
    mean, var, skew, kurt = norm.stats(loc=mu, scale=sigma, moments='mvsk')
    print("mean: %.2f, var: %.2f, skew: %.2f, kurt: %.2f" %
          (mean, var, skew, kurt))

    fig, axs = plt.subplots(2, 2)

    # 显示pdf (norm.pdf)
    ys = norm.pdf(xs, loc=mu, scale=sigma)
    axs[0][0].plot(xs, ys, 'bo', markersize=5, label='norm.pdf')
    axs[0][0].legend()
    axs[0][0].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma))

    # 显示pdf (manual)
    ys = np.exp(-((xs - mu)**2) /
                (2 * sigma**2)) / (sigma * np.sqrt(2 * np.pi))
    axs[0][1].plot(xs, ys, 'bo', markersize=5, label='cmp pdf')
    axs[0][1].legend()
    axs[0][1].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma))

    # 显示cdf
    ys = norm.cdf(xs, loc=mu, scale=sigma)
    axs[1][0].plot(xs, ys, 'bo', markersize=5, label='norm.pdf')
    axs[1][0].legend()
    axs[1][0].set_title('mu = %.2f, sigma = %.2f' % (mu, sigma))

    # 随机变量RVS
    data = norm.rvs(loc=mu, scale=sigma, size=1000)
    data = np.around(data, decimals=1)
    import sys
    sys.path.append("../../thinkstats")
    import Pmf
    pmf = Pmf.MakePmfFromList(data)
    xs, ys = pmf.Render()
    #  axs[1][1].plot(xs, ys, 'bo', markersize=5, label='rvs pmf')
    axs[1][1].scatter(xs, ys, label='rvs pmf')
    axs[1][1].legend()

    plt.show()

Example #6

0

Show file

File: distributions.py Project: yuriok/QGrain

 def interpret(parameters: np.ndarray, classes: np.ndarray, interval: float):
     n_samples, n_components, n_classes = classes.shape
     assert parameters.ndim == 3
     assert parameters.shape == (n_samples, Normal.N_PARAMETERS + 1, n_components)
     locations = np.expand_dims(parameters[:, 0, :], 2).repeat(n_classes, 2)
     scales = np.expand_dims(relu(parameters[:, 1, :]), 2).repeat(n_classes, 2)
     proportions = np.expand_dims(softmax(parameters[:, 2, :], axis=1), 1)
     components = norm.pdf(classes, loc=locations, scale=scales) * interval
     m, v, s, k = norm.stats(loc=locations[:, :, 0], scale=scales[:, :, 0], moments="mvsk")
     return proportions, components, (m, np.sqrt(v), s, k)

Example #7

0

Show file

File: stock-stats.py Project: tjh924/stocks

    def cornishfisher(self, confidence_level=0.99):
        """Return a VaR that is adjusted based on the skewness and/or kurtosis of the actual distribution."""
        data = yf.download(self.symbol, start=self.start, end=self.end)

        rets = data['Close'] / data['Close'].shift(1)
        rets = rets[1:]

        mean = np.mean(rets)
        std = np.std(rets)

        z = norm.ppf(confidence_level)
        mean, var, skew, kurt = norm.stats(moments='mvsk')
        z = (z + (z**2 - 1) * skew / 6 + (z**3 - 3 * z) * (kurt - 3) / 24 -
             (2 * z**3 - 5 * z) * (skew**2) / 36)
        return -(mean + z * std)

Example #8

0

Show file

File: test_artificial_dataset.py Project: yuriok/QGrain

class TestArtificialComponent:
    classes = np.logspace(0, 5, 101) * 0.02
    classes_phi = to_phi(classes)
    distribution = norm.pdf(classes_phi, 5, 1.0) * interval_phi(classes_phi)
    proportion = 0.5
    m, v, s, k = norm.stats(5, 1.0, moments="mvsk")
    std = np.sqrt(v)
    moments = (m, std, s, k)
    component = ArtificialComponent(classes, classes_phi, distribution,
                                    proportion, moments)

    def test_properties(self):
        assert self.component.mean == self.m
        assert self.component.sorting_coefficient == self.std
        assert self.component.skewness == self.s
        assert self.component.kurtosis == self.k
        assert isinstance(self.component.moments, dict)

Example #9

0

Show file

File: 随机变量的概率分布.py Project: qingfengliu/statics_use

def zhengtai():
    fig, ax = plt.subplots(1, 1)
    mean, var, skew, kurt = norm.stats(moments='mvsk')

    x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100)
    x = np.append(
        x,
        np.linspace(norm.ppf(0.01, loc=1, scale=1),
                    norm.ppf(0.99, loc=1, scale=1), 100))
    x = np.unique(x, axis=0)
    ax.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6, label='norm pdf')
    ax.plot(x,
            norm.pdf(x, loc=1, scale=1),
            'r-',
            lw=2,
            alpha=0.6,
            label='norm pdf')
    plt.show()
    #X＜40,
    norm.cdf(40, loc=50, scale=10)
    #累计概率为0.025时的反函数
    norm.ppf(0.025, loc=0, scale=1)

Example #10

0

Show file

File: rate4siteparser.py Project: marcus1989/MasterThesis

def rate4site():

    curdir = os.getcwd()
    curdir_up = '/'.join(curdir.split('/')[:-1])
    rate_path = "%s/RESULT/MODULE2-SMALLER" % curdir_up

    for curdir, subdirs, files in os.walk(rate_path):

        for file in files[:]:

            if '_rate' in file:

                query = file.split('_')[0]
                outpath = os.path.join(curdir, file)
                rate_file = open(outpath)
                confidence = []
                data_reqs = []
                fil_data = []
                scores = []
                residues = []
                range1 = []
                range2 = []
                y_count = []

                for i, interval in enumerate(rate_file):

                    if i == 0:

                        True

                    else:

                        if not interval.startswith('#') and interval != '\n':

                            interval = interval.split(',')
                            parts1 = interval[0].split()
                            seq = parts1[0]
                            res = parts1[1]
                            residues.append(res)
                            y_count.append(seq)
                            score = parts1[2]
                            scores.append(float(score))
                            range1.append(parts1[3][1:])
                            range1 = [item for item in range1 if item.strip()]
                            parts2 = interval[-1].split()
                            range2.append(parts2[0][:-1])
                            range2 = [item for item in range2 if item.strip()]
                            range_diff = range2 + range1
                            confidence.append(interval)
                            data_reqs.append([score, range_diff])

                fig, ax = plt.subplots(1, 1)
                mean, var, skew, kurt = norm.stats(moments='mvsk')
                rang = [min(scores), max(scores)]
                Long = len(scores)
                Maxim = max(scores)  #MaxValue
                Minim = min(scores)  #MinValue
                av = np.mean(scores)  #Average
                StDev = np.std(scores)  #Standard Dev.

                x = np.linspace(Minim, Maxim, Long)
                ax.plot(x,
                        norm.pdf(x, av, StDev),
                        'r-',
                        lw=3,
                        alpha=0.9,
                        label='RATE SCORES')

                weights = np.ones_like(scores) / len(scores)
                normalized = [(s - min(scores)) / (max(scores) - min(scores))
                              for s in scores]
                newpath = os.path.join(curdir)

                ax.hist(normalized,
                        weights=weights,
                        normed=True,
                        histtype='stepfilled',
                        alpha=0.2,
                        label='NORMALIZED RATE SCORES')
                plt.title('%s' % query + '_Normalized_Rate4Site_Scores')
                plt.xlabel('Rate4Site_Scores', fontsize=14)
                plt.ylabel('Sequence_Count', fontsize=14)
                plt.legend(loc='upper right')
                fig.savefig(newpath + '_%s_normalized.png' % query)
                plt.close("all")

                y_count = map(int, y_count)

                color_path = os.path.join(curdir)

                bins = np.arange(floor(min(normalized[:])),
                                 ceil(max(normalized[:])), 0.10)

                colors = ('blue', 'red', 'green', 'cyan', 'purple', 'pink',
                          'violet', 'lime', 'aqua')

                # get the max count for a particular bin for all classes combined
                max_bin = max(np.histogram(normalized[:], bins=bins)[0])
                plt.figure()
                n, bins, patches = plt.hist(normalized[:], bins, alpha=0.3)

                for c, p in zip(colors, patches):

                    plt.setp(p, 'facecolor', c)

                plt.ylim([0, max_bin * 1.3])
                plt.title('%s' % query + '_Normalized_Scores_In_9_Bins')
                plt.xlabel('Color_Bins', fontsize=14)
                plt.ylabel('Sequence_Count', fontsize=14)
                plt.legend(loc='upper right')
                plt.savefig(newpath + '_%s_ColorBins.png' % query)
                plt.close("all")

                pairs = [(x, y, z)
                         for x, y, z in zip(normalized, residues, y_count)]
                group1 = []
                group2 = []
                group3 = []

                for item in pairs[:]:

                    if item[0] <= 0.4:

                        group1.append(item)

                    elif 0.5 <= item[0] <= 0.6:

                        group2.append(item)

                    else:

                        group3.append(item)

                conserved_residues = open(
                    newpath + '/%s_conserved_residues.txt' % query, 'w')

                respo = []

                for items in group1[:]:

                    res_po = tuple((items[2], items[1]))
                    respo.append(res_po)

                strs = " ".join(str(x) for x in respo)
                conserved_residues.write(strs + "\n")
                conserved_residues.close()

Example #11

0

Show file

File: sipE09_scipy1.py Project: Godcomplex11/DU

# -*- coding: utf-8 -*-
#https://docs.scipy.org/doc/scipy/reference/stats.html
#https://docs.scipy.org/doc/scipy/reference/tutorial/stats.html
#import scipy
#scipy.stats package is imported as
from scipy import stats

# individual objects are imported as
from scipy.stats import norm

norm.cdf(0)
norm.cdf([-1., 0, 1])
import numpy as np
norm.cdf(np.array([-1., 0, 1]))
norm.mean(), norm.std(), norm.var()
norm.stats(moments="mv")
norm.ppf(0.5)
norm.rvs(size=3)  #random nos

#drawing random numbers relies on generators from numpy.random package. In the example above, the specific stream of random numbers is not reproducible across runs. To achieve reproducibility, you can explicitly seed a global variable
np.random.seed(1234)
#Relying on a global state is not recommended though. A better way is to use the random_state parameter which accepts an instance of numpy.random.RandomState class, or an integer which is then used to seed an internal RandomState object:
norm.rvs(size=5, random_state=1234)
norm.rvs(5)  #one no only

#Shifting and Scaling¶
#All continuous distributions take loc and scale as keyword parameters to adjust the location and scale of the distribution, e.g. for the standard normal distribution the location is the mean and the scale is the standard deviation.
norm.stats(loc=3, scale=4, moments="mv")

#uniform distribution
from scipy.stats import uniform

Example #12

0

Show file

File: generate_univariate.py Project: jxieeducation/Quick-Data-Science-Experiments-2016

from scipy.stats import norm
import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)

mean, var, skew, kurt = norm.stats(moments='mvsk')

x = np.linspace(0, 10, 100)

# 2, 0.1; 8, 0.1
# 4.8, 0.1; 5.2, 0.1
# 4.8, 1; 5.2, 1
# 2, 1; 8, 1

ax.plot(x, norm.pdf(x, 2, 1), 'r-', lw=5, alpha=0.6, label='norm pdf')
ax.plot(x, norm.pdf(x, 8, 1), 'b-', lw=5, alpha=0.6, label='norm pdf')

plt.show()
fig.savefig('graph.png')

Example #13

0

Show file

File: dist.py Project: Cassis0401/ce888labs-1

import matplotlib

matplotlib.use('Agg')

import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import uniform, pareto, norm

#mean, var, skew, kurt =
b = 1.0
dists = []
dists += [[("pareto"), pareto.stats(2, moments='mvsk'), pareto]]
dists += [[("uniform"), uniform.stats(moments='mvsk'), uniform]]
dists += [[("normal"), norm.stats(moments='mvsk'), norm]]
dists += [[("normal_sc"), norm.stats(moments='mvsk'), norm]]

print(dists)

size = 20000

for dist in dists:

    print dist[0]
    if (dist[0] == "pareto"):
        sample = dist[2].rvs(b, size=size)
        sample = sample[(sample < 8)]
    if (dist[0] == "normal"):
        sample = dist[2].rvs(size=size)
    if (dist[0] == "uniform"):

Example #14

0

Show file

plt.legend(loc='upper left', shadow=True)

plt.show()

# ### Gaussian (Normal) Distribution

# In[9]:

#Gaussian (Normal) Distribution
from scipy.stats import norm

loc, scale = 1, 2  # Mean and Variance
x = np.linspace(norm.ppf(0.01, loc, scale), norm.ppf(0.99, loc, scale),
                25)  #Percent Point Function (inverse of cdf — percentiles)

print("Mean              : ", norm.stats(loc, scale, moments='m'))
print("Variance          : ", norm.stats(loc, scale, moments='v'))
print("Prob. Dens. Func. : ", norm.pdf(x, loc, scale))
print("Cum. Density Func.: ", norm.cdf(x, loc, scale))

CDF = norm.cdf(x, loc, scale)

fig = plt.figure(figsize=(20, 10))
plt.subplot(221)
plt.plot(x, norm.pdf(x, loc, scale), 'g', ms=8, label='PDF')
plt.xlabel("Sample Space of Gaussian Distribution", fontsize=14)
plt.ylabel("PDF", fontsize=14)
plt.title("Probability Distribution of Gaussian(µ=1,σ=2) Distribution",
          fontsize=16)
plt.xticks(np.arange(-5, 7, 1))
plt.yticks(np.arange(0, 0.30, 0.05))

Example #15

0

Show file

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

np.random.seed(5)

# Normal Distribution
mean, var = norm.stats(moments='mv')
std = norm.std()

fig, ax = plt.subplots(1, 1)

x = np.linspace(norm.ppf(0.05), norm.ppf(0.95))
ax.plot(x, norm.pdf(x), 'b-', lw=3, alpha=0.6, label='Gaussian')

q1 = norm.ppf(.25)
median = norm.ppf(.5)
q3 = norm.ppf(.75)

plt.title(
    'Gaussian Distribution ($\mu$: {:.2f}, $\sigma$: {:.2f}, $\sigma^2$: {:.2f})'
    .format(mean, std, var),
    size='xx-large')
plt.xlabel('X', size='large')
plt.ylabel('P(X)', size='large')

# Quartile lines
ax.axvline(x=q1, linewidth=3, alpha=0.6, color='black', linestyle='dashed')
ax.axvline(x=median, linewidth=3, alpha=0.6, color='black', linestyle='dashed')
ax.axvline(x=q3, linewidth=3, alpha=0.6, color='black', linestyle='dashed')

Example #16

0

Show file

from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from scipy.stats import cauchy, norm, t
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Student's t Distribution with Degree of Freedom = 1 and Normal/Gaussian Distrubution are used for Measuring Similarities
x = np.linspace(-10,10,500)
my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","green","blue","gold","purple","black"])

# Degree of Freedom
df = 1

n_mean, n_var, n_skew, n_kurt = norm.stats(moments='mvsk')
c_mean, c_var, c_skew, c_kurt = t.stats(df,moments='mvsk')

plt.plot(x,norm.pdf(x),'r',label='Normal Distribution')
plt.plot(x,cauchy.pdf(x),'b',label="Student's t Distribution")
plt.grid()
plt.legend()
plt.title('Normal Distribution vs Student t-Distribution')
plt.savefig('Images/SimilarityDistributions.png')
plt.show()


# Loading Dataset
Train = pd.read_csv('Dataset/train.csv')

X_Train = ((Train.loc[:, Train.columns != 'label']).to_numpy())

Example #17

0

Show file

 def get_moments(*args) -> dict:
     assert len(args) == len(NormalDistribution.get_parameter_names())
     m, v, s, k = norm.stats(*args, moments="mvsk")
     std = np.sqrt(v)
     moments = dict(mean=m, std=std, skewness=s, kurtosis=k)
     return moments

Example #18

0

Show file

File: Gaussian.py Project: DucVuMinh/PRML

# from http://matplotlib.org/users/screenshots.html#slider-demo
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import Slider, Button
from scipy.stats import norm
import prettyplotlib as ppl

fig, ax = plt.subplots(1)
plt.subplots_adjust(left=0.1, bottom=0.25)

# Default parameters of the distribution
params = norm.stats()

# Initial plot
x = np.linspace(-5, 5, 100)

line_pdf, = ppl.plot(ax,
                     x,
                     norm.pdf(x, *params),
                     lw=2,
                     color='red',
                     label="pdf")
line_cdf, = ppl.plot(ax,
                     x,
                     norm.cdf(x, *params),
                     lw=1,
                     color='lightgrey',
                     label="cdf")
ppl.legend(ax)
# plt.axis([-10, 10, 0, 1])

Example #19

0

Show file

print('Normal distribution CDF X<=0: ', norm.cdf(0))
a = np.array([-1, 0, 1])
print(norm.cdf(a))
print(norm.ppf(0.05))
print(norm.ppf(0.025))
# generating random normal variates:
r = norm.rvs(0, 1, size=10)
print(r)
######################
# shifting and scaling:
# All continuous distributions take loc and scale as keyword
# parameters to adjust the location and scale of the distribution
# for example for normal dist, location is the mean and scale is standard deviation
# stat keyword gives moment of the distribution
print('\n', 'Shifting and Scaling:')
a = norm.stats(loc=3, scale=4, moments="mv")
print(a)
aVec = norm.stats(loc=[0, 1, 2], scale=[1, 2, 4], moments="mv")
print('vectorized: ')
print(aVec)
# random numbers from multivariate normal
# does not give Multivariate normal!
print('MULTI?! Does not give multivariate Normal!')
rvs = norm.rvs(loc=np.array([0, 1]), scale=np.array([[1, 0], [0, 1]]))
print(rvs)
print('\n\n')
#############################################################
#############################################################
# In general the standardized distribution for a random variable X is
# obtained through the transformation (X - loc) / scale. The
# default values are loc = 0 and scale = 1.

Example #20

0

Show file

print(a)
b = np.array([[5], [6]])
print(b)

print(linalg.inv(a).dot(b))
print(a.dot(linalg.inv(a).dot(b)) - b)
print(np.linalg.solve(a, b))
print(a.dot(np.linalg.solve(a, b)) - b)
print('-----')

# Step 4. Common Methods in stats
print(norm.cdf(0))
print(norm.cdf([-1., 0, 1]))
print(norm.cdf(np.array([-1., 0, 1])))
print(norm.mean(), norm.std(), norm.var())
print(norm.stats(moments="mv"))
print(norm.ppf(0.5))
print(norm.rvs(size=3))
print(np.random.seed(1234))
print(norm.rvs(size=5, random_state=1234))
print(norm.rvs(5))
print('-----')

# Step 5. Broadcasting
print(stats.t.isf([0.1, 0.05, 0.01], [[10], [11]]))
print(stats.t.isf([0.1, 0.05, 0.01], 10))
print(stats.t.isf([0.1, 0.05, 0.01], [10, 11, 12]))
print('-----')

# Step 6. Anlysing one sample
np.random.seed(282629734)

Example #21

0

Show file

	import numpy as np
	from scipy import linalg
	A = np.array([[1,2],[3,4]])
	la,v = linalg.eig(A)
	l1,l2 = la
	print(l1, l2) 
	print(v[:,0]) 
	print(v[:,1]) 
	print(np.sum(abs(v**2),axis=0)) 
	v1 = np.array(v[:,0]).T
	print(linalg.norm(A.dot(v1)-l1*v1))
	
	from scipy.stats import norm
	r = norm.rvs(loc=0, scale=1, size=1000)
	print(r)
	print(norm.stats(moments='mvsk'))
	
	
	from scipy import stats
	import numpy as np
	x = np.random.random(10)
	y = np.random.random(10)
	slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
	print({'slope':slope,'intercept':intercept})
	print({'p_value':p_value,'r-squared':round(r_value**2,2)})
	
	import numpy as np
	from scipy.optimize import minimize
	
	Define the function
	def rosen(x):

Example #22

0

Show file

## Funções estatísticas

O módulo *stats* contém uma grande quantidade de distribuições de probabilidade (124 atualmente), e uma biblioteca em constante crescimento de funções estatísticas, incluindo estatística descritiva, distribuições de frequência, correlações, testes, transformações, distâncias, estatística circular, etc. Também há duas classes que facilitam a criação de distribuições customizadas: *rv_continuous*, para distribuições contínuas, e *rv_discrete*, para discretas.

### Usando distribuições

Nesta Seção, tomaremos como exemplo a distribuição normal, representada pela classe *scipy.stats.norm*, que herda todos métodos genéricos da classe *rv_continuous*. A classe *norm* representa a forma normal padrão, i.e. sua função densidade de probabilidade (fdp) para um número real $x$ é: $$f(x) = \frac{e^{-x^{2}/2}}{\sqrt{2\pi}}.$$
Para importar a ditribuição normal, fazemos: 


from scipy.stats import norm

Para calcular alguns momentos:

mean, var, skew, kurt = norm.stats(moments='mvsk')
print('média: {}, variância: {}, assimetria: {}, curtose: {}'.format(mean, var, skew, kurt))

Como dito acima, essa classe implementa a distribuição normal padrão. Para deslocar ou mudar a escala da distribuição, i.e. informar média e desvio-padrão, pode-se infomar os parâmetros *loc* e *scale*, respectivamente. Exemplo:

mean, var = norm.stats(loc=2, scale=0.5, moments='mv')
print('média: {}, variância: {}'.format(mean, var, skew, kurt))

Para calcular a fdp para um número real $x$, fazemos:

x = 1
print(norm.pdf(x, loc=2, scale=0.5))

A função *pdf* também pode ser usada para calcular a fdp para cada elemento de um *array*:

import numpy as np

Example #23

0

Show file

File: Data-Wrangling.py Project: zuerwww/tools-for-data-science-course

# A large number of probability distributions as well as a growing library of statistical functions are available in `scipy.stats`. See http://docs.scipy.org/doc/scipy/reference/stats.html for a complete list.
# 正态分布等

# Generate random numbers from normal distribution:

# In[29]:

from scipy.stats import norm  #morm函数
r = norm.rvs(loc=0, scale=1, size=1000)
#rvs正态分布，loc均值，scale标准差，size生成多少个随机数——r即生成的随机数

# Calculate a few first moments:

# In[30]:

mean, var, skew, kurt = norm.stats(moments='mvsk')
#统计：四阶矩

# # Linear regression model （线性回归模型）
#
# This example computes a least-squares regression for two sets of measurements.

# In[31]:

from scipy import stats
import numpy as np
x = np.random.random(10)
y = np.random.random(10)
slope, intercept, r_value, p_value, std_err = stats.linregress(
    x, y)  #.linregress：做简单线性模型；std_err：标准差
print({'slope': slope, 'intercept': intercept})

Example #24

0

Show file

File: 141206_statstests.py Project: AlIrvine/hyperbolic

""" Stats distributions tests"""
from scipy.stats import norm, cauchy
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = cauchy.stats(moments='mvsk')
print kurt

mean_n, var_n, skew_n, kurt_n = norm.stats(moments='mvsk')
print mean_n
print var_n, skew_n, kurt_n