예제 #1
0
파일: 4-7.py 프로젝트: qrsforever/workspace
def testIntelligenceScale():
    """
    通过erf.NormalCdf()得到正太分布的近似累积分布
    """

    """
    eg: 正态分布(μ=100, σ=15) 用erf.NormalCdf 函数查看正态分布中罕见事件的频数。
        高于均值, 115、130、145 的分别是多少(百分比)?
    """
    mu, sigma = 100, 15
    IQs = [mu, 115, 130, 145]
    ys = []
    for iq in IQs:
        percent = (1 - erf.NormalCdf(iq, mu=mu, sigma=sigma)) * 100
        ys.append(percent)
        print("%.2f%% people IQ > %d" % (percent, iq))

    plt.bar(IQs, ys, width=0.8, align="center")
    plt.show()


    """
    六西格玛: 超出均值6个标准差的值, 100 + 6 * 15 = 190
    """
    people = 6 * 1000 * 1000 * 1000 * (1 - erf.NormalCdf(mu + 6*sigma, mu=mu, sigma=sigma))
    print("%d people IQ > %d" % (people, 5*sigma))

    pass
예제 #2
0
파일: 5-6.py 프로젝트: qrsforever/workspace
def process(data):
    # Hist 分布图
    hist = Pmf.MakeHistFromList(data, name='hist')
    myplot.Hist(hist, color='blue')
    myplot.Show()

    # Pmf 分布图
    pmf = Pmf.MakePmfFromHist(hist, name='pmf')
    myplot.Pmf(pmf, color='yellow')
    myplot.Show()

    myplot.Clf()

    # 实际数据的CDF分布图
    cdf = Cdf.MakeCdfFromList(data, name='loafs')
    myplot.Cdf(cdf)

    mu, var = thinkstats.MeanVar(data)
    sigma = math.sqrt(var)
    print("mu = %.3f, sigma = %.3f" % (mu, sigma))

    # 正态分布
    xs = normal_sample(len(data), mu, sigma)  # xs = data
    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs]
    myplot.Scatter(xs, ys, color='red', label='sample')
    myplot.Show()
예제 #3
0
def Test(actual1, actual2, model, iters=1000):
    """Estimates p-values based on differences in the mean.
    
    Args:
        actual1:
        actual2: sequences of observed values for groups 1 and 2
        model: sequences of values from the hypothetical distribution
    """
    n = len(actual1)
    m = len(actual2)

    # compute delta
    mu1, mu2, delta = hypothesis.DifferenceInMean(actual1, actual2)
    delta = abs(delta)

    print('n:', n)
    print('m:', m)
    print('mu1', mu1)
    print('mu2', mu2)
    print('delta', delta)

    # compute the expected distribution of differences in sample mean
    mu_pooled, var_pooled = thinkstats.MeanVar(model)
    print('(Mean, Var) of pooled data', mu_pooled, var_pooled)

    f = 1.0 / n + 1.0 / m
    mu, var = (0, f * var_pooled)
    print('Expected Mean, Var of deltas', mu, var)

    # compute the p-value of delta in the observed distribution
    sigma = math.sqrt(var)
    left = erf.NormalCdf(-delta, mu, sigma)
    right = 1 - erf.NormalCdf(delta, mu, sigma)
    pvalue = left + right
    print('Tails:', left, right)
    print('p-value:', pvalue)

    # compare the mean and variance of resamples differences
    deltas = [hypothesis.Resample(model, model, n, m) for i in range(iters)]
    mean_var = thinkstats.MeanVar(deltas)
    print('(Mean, Var) of resampled deltas', mean_var)

    return pvalue
예제 #4
0
파일: 4-x.py 프로젝트: qrsforever/workspace
def CmpNormalModelWithDataSample():
    firsts, others, babies = Babies.PartitionBabies()
    weights = Babies.GetWightList(babies)
    pmf = Pmf.MakePmfFromList(weights)
    mu = pmf.Mean()
    var = pmf.Var(mu)
    sigma = math.sqrt(var)
    print("mu = {}, var = {}, sigma = {}".format(mu, var, sigma))

    # 经验分布, 数据
    cdf = Cdf.MakeCdfFromPmf(pmf, name='data')
    myplot.cdf(cdf)

    # u, sigma --> 误差函数计算 模型
    xs, yy = pmf.Render()
    ys = [erf.NormalCdf(x, mu, sigma) for x in xs]
    myplot.Plot(xs, ys, label='Model')
    myplot.Show()
    myplot.Clf()
예제 #5
0
파일: 4-8.py 프로젝트: qrsforever/workspace
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import math
import Babies
import Cdf
import myplot
import thinkstats
import erf

if __name__ == "__main__":
    firsts, others, babies = Babies.PartitionBabies()
    preglengths = Babies.GetPregnacyList(babies)
    mu = thinkstats.Mean(preglengths)
    sigma = math.sqrt(thinkstats.Var(preglengths, mu))
    print("mu = %.3f sigma = %.3f" % (mu, sigma))

    cdf0 = Cdf.MakeCdfFromList(preglengths, name='cdf0')

    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in preglengths]
    cdf1 = Cdf.Cdf(preglengths, ys, 'cdf1')

    myplot.Cdf(cdf1, complement=False, transform=None)
    myplot.Cdfs([cdf0, cdf1], complement=False, transform=None)
    myplot.Show()
    # TODO wrong
예제 #6
0
def main():
    x = range(200)
    cdf = [erf.NormalCdf(_x,mu=100,sigma=15) for _x in x]
    print('{:.3f} of people have more than 190IQ '.format((1-cdf[189])*6*(10)**9))
    plt.plot(x, cdf)
    plt.show()
예제 #7
0
def RenderNormalCdf(mu, sigma, max, n=50):
    """Generates sequences of xs and ps for a normal CDF."""
    xs = [max * i / n for i in range(n)]    
    ps = [erf.NormalCdf(x, mu, sigma) for x in xs]
    return xs, ps
예제 #8
0
파일: ch4-ex7.py 프로젝트: cbuie/thinkstats
def underNormal(value, m, s):
    return erf.NormalCdf(value, mu=m, sigma=s)
예제 #9
0
파일: ch4-ex7.py 프로젝트: cbuie/thinkstats
def overNormal(value, m, s):
    return 1 - erf.NormalCdf(value, mu=m, sigma=s)