def testIntelligenceScale(): """ 通过erf.NormalCdf()得到正太分布的近似累积分布 """ """ eg: 正态分布(μ=100, σ=15) 用erf.NormalCdf 函数查看正态分布中罕见事件的频数。 高于均值, 115、130、145 的分别是多少(百分比)? """ mu, sigma = 100, 15 IQs = [mu, 115, 130, 145] ys = [] for iq in IQs: percent = (1 - erf.NormalCdf(iq, mu=mu, sigma=sigma)) * 100 ys.append(percent) print("%.2f%% people IQ > %d" % (percent, iq)) plt.bar(IQs, ys, width=0.8, align="center") plt.show() """ 六西格玛: 超出均值6个标准差的值, 100 + 6 * 15 = 190 """ people = 6 * 1000 * 1000 * 1000 * (1 - erf.NormalCdf(mu + 6*sigma, mu=mu, sigma=sigma)) print("%d people IQ > %d" % (people, 5*sigma)) pass
def process(data): # Hist 分布图 hist = Pmf.MakeHistFromList(data, name='hist') myplot.Hist(hist, color='blue') myplot.Show() # Pmf 分布图 pmf = Pmf.MakePmfFromHist(hist, name='pmf') myplot.Pmf(pmf, color='yellow') myplot.Show() myplot.Clf() # 实际数据的CDF分布图 cdf = Cdf.MakeCdfFromList(data, name='loafs') myplot.Cdf(cdf) mu, var = thinkstats.MeanVar(data) sigma = math.sqrt(var) print("mu = %.3f, sigma = %.3f" % (mu, sigma)) # 正态分布 xs = normal_sample(len(data), mu, sigma) # xs = data ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs] myplot.Scatter(xs, ys, color='red', label='sample') myplot.Show()
def Test(actual1, actual2, model, iters=1000): """Estimates p-values based on differences in the mean. Args: actual1: actual2: sequences of observed values for groups 1 and 2 model: sequences of values from the hypothetical distribution """ n = len(actual1) m = len(actual2) # compute delta mu1, mu2, delta = hypothesis.DifferenceInMean(actual1, actual2) delta = abs(delta) print('n:', n) print('m:', m) print('mu1', mu1) print('mu2', mu2) print('delta', delta) # compute the expected distribution of differences in sample mean mu_pooled, var_pooled = thinkstats.MeanVar(model) print('(Mean, Var) of pooled data', mu_pooled, var_pooled) f = 1.0 / n + 1.0 / m mu, var = (0, f * var_pooled) print('Expected Mean, Var of deltas', mu, var) # compute the p-value of delta in the observed distribution sigma = math.sqrt(var) left = erf.NormalCdf(-delta, mu, sigma) right = 1 - erf.NormalCdf(delta, mu, sigma) pvalue = left + right print('Tails:', left, right) print('p-value:', pvalue) # compare the mean and variance of resamples differences deltas = [hypothesis.Resample(model, model, n, m) for i in range(iters)] mean_var = thinkstats.MeanVar(deltas) print('(Mean, Var) of resampled deltas', mean_var) return pvalue
def CmpNormalModelWithDataSample(): firsts, others, babies = Babies.PartitionBabies() weights = Babies.GetWightList(babies) pmf = Pmf.MakePmfFromList(weights) mu = pmf.Mean() var = pmf.Var(mu) sigma = math.sqrt(var) print("mu = {}, var = {}, sigma = {}".format(mu, var, sigma)) # 经验分布, 数据 cdf = Cdf.MakeCdfFromPmf(pmf, name='data') myplot.cdf(cdf) # u, sigma --> 误差函数计算 模型 xs, yy = pmf.Render() ys = [erf.NormalCdf(x, mu, sigma) for x in xs] myplot.Plot(xs, ys, label='Model') myplot.Show() myplot.Clf()
#!/usr/bin/python3 # -*- coding: utf-8 -*- import math import Babies import Cdf import myplot import thinkstats import erf if __name__ == "__main__": firsts, others, babies = Babies.PartitionBabies() preglengths = Babies.GetPregnacyList(babies) mu = thinkstats.Mean(preglengths) sigma = math.sqrt(thinkstats.Var(preglengths, mu)) print("mu = %.3f sigma = %.3f" % (mu, sigma)) cdf0 = Cdf.MakeCdfFromList(preglengths, name='cdf0') ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in preglengths] cdf1 = Cdf.Cdf(preglengths, ys, 'cdf1') myplot.Cdf(cdf1, complement=False, transform=None) myplot.Cdfs([cdf0, cdf1], complement=False, transform=None) myplot.Show() # TODO wrong
def main(): x = range(200) cdf = [erf.NormalCdf(_x,mu=100,sigma=15) for _x in x] print('{:.3f} of people have more than 190IQ '.format((1-cdf[189])*6*(10)**9)) plt.plot(x, cdf) plt.show()
def RenderNormalCdf(mu, sigma, max, n=50): """Generates sequences of xs and ps for a normal CDF.""" xs = [max * i / n for i in range(n)] ps = [erf.NormalCdf(x, mu, sigma) for x in xs] return xs, ps
def underNormal(value, m, s): return erf.NormalCdf(value, mu=m, sigma=s)
def overNormal(value, m, s): return 1 - erf.NormalCdf(value, mu=m, sigma=s)