def main(): filename = 'mystery0.dat' data = read_file(filename) cdf = thinkstats2.MakeCdfFromList(data) thinkplot.SubPlot(2, 3, 1) thinkplot.Cdf(cdf) thinkplot.Config(title='linear') thinkplot.SubPlot(2, 3, 2) scale = thinkplot.Cdf(cdf, xscale='log') thinkplot.Config(title='logx', **scale) thinkplot.SubPlot(2, 3, 3) scale = thinkplot.Cdf(cdf, transform='exponential') thinkplot.Config(title='expo', **scale) thinkplot.SubPlot(2, 3, 4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys) thinkplot.Config(title='normal') thinkplot.SubPlot(2, 3, 5) scale = thinkplot.Cdf(cdf, transform='pareto') thinkplot.Config(title='pareto', **scale) thinkplot.SubPlot(2, 3, 6) scale = thinkplot.Cdf(cdf, transform='weibull') thinkplot.Config(title='weibull', **scale) thinkplot.Show()
def main(script, filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf) thinkplot.Config(title='linear') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log') thinkplot.Config(title='logx', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential') thinkplot.Config(title='expo', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys) thinkplot.Config(title='normal') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto') thinkplot.Config(title='pareto', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull') thinkplot.Config(title='weibull', **scale) thinkplot.Show(legend=False)
def main(script, filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(num=6, rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf, color='C0', label=filename) thinkplot.Config(title='CDF on linear scale', ylabel='CDF') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log', color='C0') thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential', color='C0') thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys, color='C0') thinkplot.Config(title='Normal probability plot', xlabel='random normal', ylabel='data') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto', color='C0') thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull', color='C0') thinkplot.Config(title='CCDF on loglog-y log-x scale', ylabel='log log CCDF', **scale) thinkplot.Show(legend=False)
def MakeNormalPlot(weights, term_weights): """Generates a normal probability plot of birth weights.""" mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01) std = math.sqrt(var) xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, mean, std) thinkplot.Plot(fxs, fys, linewidth=4, color='0.8') thinkplot.PrePlot(2) xs, ys = thinkstats2.NormalProbability(weights) thinkplot.Plot(xs, ys, label='all live') xs, ys = thinkstats2.NormalProbability(term_weights) thinkplot.Plot(xs, ys, label='full term') thinkplot.Save(root='analytic_birthwgt_normal', title='Normal probability plot', xlabel='Standard deviations from mean', ylabel='Birth weight (lbs)')
def PlotNormalProbability(sample, title="", ylabel=""): mu, var = thinkstats2.TrimmedMeanVar(sample, p=0.01) sigma = np.sqrt(var) xs = [-5, 5] fxs, fys = thinkstats2.FitLine(xs, inter=mu, slope=sigma) thinkplot.plot(fxs, fys, color='gray', label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format( mu, sigma)) xs, ys = thinkstats2.NormalProbability(sample) thinkplot.Plot(xs, ys, label="actual") thinkplot.Config(title=title, xlabel="z", ylabel=ylabel)
def MakeNormalPlot(weights): """Generates a normal probability plot of birth weights. weights: sequence """ mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01) std = math.sqrt(var) xs = [-5, 5] xs, ys = thinkstats2.FitLine(xs, mean, std) thinkplot.Plot(xs, ys, color='0.8', label='model') xs, ys = thinkstats2.NormalProbability(weights) thinkplot.Plot(xs, ys, label='weights')
def MakeNormalPlot(x): """Generates a normal probability plot of birth weights.""" mean, var = thinkstats2.TrimmedMeanVar(df[x], p=0.01) std = math.sqrt(var) xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, mean, std) thinkplot.Plot(fxs, fys, linewidth=4, color='0.8') thinkplot.PrePlot(2) xs, ys = thinkstats2.NormalProbability(df[x]) thinkplot.Plot(xs, ys, label='Number of Crimes') thinkplot.Show(title='Normal Prob Plot: {}'.format(x), xlabel='Standard deviations from mean', ylabel='Number of Crimes')
def MakeExampleNormalPlot(): """Generates a sample normal probability plot. """ n = 1000 thinkplot.PrePlot(3) mus = [0, 1, 5] sigmas = [1, 1, 2] for mu, sigma in zip(mus, sigmas): sample = np.random.normal(mu, sigma, n) xs, ys = thinkstats2.NormalProbability(sample) label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma) thinkplot.Plot(xs, ys, label=label) thinkplot.Save(root='analytic_normal_prob_example', title='Normal probability plot', xlabel='standard normal sample', ylabel='sample values')
def MakeNormalPlot(arrivalDelays): """Generate the normal probability plot for the arrival delays. This is a modified copy from analytic.py """ mean = arrivalDelays.mean() std = arrivalDelays.std() xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, mean, std) thinkplot.Plot(fxs, fys, linewidth=4, color='0.8') thinkplot.PrePlot(2) xs, ys = thinkstats2.NormalProbability(arrivalDelays) thinkplot.Plot(xs, ys, label='arrival delays (min)') thinkplot.Save(root='NormalModel_arrivaldelay_normalplot', title='Normal probability plot', xlabel='Standard deviations from mean', ylabel='Arrival Delays (min)')
def plotDist(data): """ function to plot normal probability plot @param: data (series) - data to be ploted """ n = 30 thinkplot.PrePlot(3) mus = [0, 1, 5] sigmas = [1, 1, 2] for mu, sigma in zip(mus, sigmas): sample = data.sample(n=n) xs, ys = thinkstats2.NormalProbability(sample) label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma) thinkplot.Plot(xs, ys, label=label) thinkplot.Config(title='Normal probability plot', xlabel='standard normal sample', ylabel='sample values')
thinkplot.Hist(male_pmf, width=width, align='left', color='blue') thinkplot.Hist(female_pmf, width=width, align='right', color='red') thinkplot.Config(ylabel='Probability') #plot CDF cdf = thinkstats2.Cdf(df.Age) thinkplot.Cdf(cdf) thinkplot.Config(xlabel='Age', ylabel='CDF') #plot normal distribution mean = df.Age.mean() std = df.Age.std() xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, inter=mean, slope=std) thinkplot.Plot(fxs, fys, color='gray', label='model') xs, ys = thinkstats2.NormalProbability(df.Age) thinkplot.Plot(xs, ys, label='Age') #scatter plots and correlation #year vs. age year = thinkstats2.Jitter(df.Year, .25) thinkplot.Scatter(year, df.Age) thinkplot.Show(xlabel='Year', ylabel='Age') thinkstats2.Corr(df.Year, df.Age) #drug vs. age thinkplot.Scatter(df.Age, df.Drug) thinkplot.Show(xlabel='Age', ylabel='Drug') #testing a difference in gender data = male.Age.values, female.Age.values ht = DiffMeansPermute(data)
xlabel='Birth weight (pounds)', ylabel='CDF') #%% [markdown] # A normal probability plot is a visual test for normality. The following example shows that if the data are actually from a normal distribution, the plot is approximately straight. #%% n = 1000 thinkplot.PrePlot(3) mus = [0, 1, 5] sigmas = [1, 1, 2] for mu, sigma in zip(mus, sigmas): sample = np.random.normal(mu, sigma, n) xs, ys = thinkstats2.NormalProbability(sample) label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma) thinkplot.Plot(xs, ys, label=label) thinkplot.Config(title='Normal probability plot', xlabel='standard normal sample', ylabel='sample values') #%% [markdown] # Here's the normal probability plot for birth weights, showing that the lightest babies are lighter than we expect from the normal mode, and the heaviest babies are heavier. #%% mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01) std = np.sqrt(var) xs = [-4, 4]
y = thinkstats2.EvalNormalCdf(x, mu=mu, sigma=sigma) thinkplot.plot(x, cdf.Probs(x), label='Data') thinkplot.plot(x, y, label=r'Model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma)) thinkplot.Config(xlabel="weight (pounds)", ylabel="CDF") #%% [markdown] # ## 5.3 Normal probability plt #%% n = 1000 thinkplot.PrePlot(3) for mu, sigma in zip([0, 1, 5], [1, 1, 2]): sample = np.random.normal(mu, sigma, n) xs, ys = thinkstats2.NormalProbability(sample) thinkplot.plot(xs, ys, label=r"$\mu$={} $\sigma$={}".format(mu, sigma)) thinkplot.Config(title="Normal probability plot", xlabel="standard normal sample", ylabel="sample value") #%% mu, var = thinkstats2.TrimmedMeanVar(totalwgt_lb, p=0.01) maturity = live[live.prglngth >= 37].totalwgt_lb.dropna() sigma = np.sqrt(var) xs = [-4, 4] fxs, fys = thinkstats2.FitLine(xs, inter=mu, slope=sigma) thinkplot.plot(fxs, fys, color='gray', label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))
# Model fits data except for in th left tail up to 15 minutes # If data is from a normal distribution then the plot will be straight # In[72]: n = 1000 thinkplot.PrePlot(3) mus = [0, 1, 5] sigmas = [1, 1, 2] for mu, sigma in zip(mus, sigmas): sample = np.random.normal(mu, sigma, n) xs, ys = thinkstats2.NormalProbability(sample) label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma) thinkplot.Plot(xs, ys, label=label) thinkplot.Config(title='Normal probability plot', xlabel='standard normal sample', ylabel='sample values') # Normal Probability Plot for trip duration # In[73]: mean, var = thinkstats2.TrimmedMeanVar(df.tripduration, p=0.01) std = np.sqrt(var)
cdf = thinkstats2.MakeCdfFromPmf(pmf, label=label) thinkplot.Cdf(cdf, label=label) thinkplot.Show(xlabel='x', ylabel='CDF', axis=[-1, 4, 0, 1]) ## make data frames live, firsts, others = first.MakeFrames() ## make normal probability plot of totalwgt_lb full_term_wgt = live.loc[live.prglngth >= 36, 'totalwgt_lb'] weights = live.totalwgt_lb thinkplot.PrePlot(2) fxs, fys = MakeNormalPlot(weights) thinkplot.Plot(fxs, fys, color='gray', label='model') xs, ys = thinkstats2.NormalProbability(weights) thinkplot.Plot(xs, ys, label='all live') xs, ys = thinkstats2.NormalProbability(full_term_wgt) thinkplot.Plot(xs, ys, label='full term') thinkplot.Show(xlabel='standard deviations from mean', ylabel='Birth weight (lbs)', title='Normal probability plot') ## make Pareto CDF from random variates t = [ParetoVariate(alpha=2, xm=1) for _ in range(1000)] cdf = thinkstats2.Cdf(t) thinkplot.Cdf(cdf, complement=True) thinkplot.Show(xlabel='x', ylabel='CCDF', xscale='log', yscale='log')