コード例 #1
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()
コード例 #2
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)
コード例 #3
0
ファイル: test_models.py プロジェクト: RachelONelson/DSC530
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)
コード例 #4
0
def MakeNormalPlot(weights, term_weights):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='all live')

    xs, ys = thinkstats2.NormalProbability(term_weights)
    thinkplot.Plot(xs, ys, label='full term')
    thinkplot.Save(root='analytic_birthwgt_normal',
                   title='Normal probability plot',
                   xlabel='Standard deviations from mean',
                   ylabel='Birth weight (lbs)')
コード例 #5
0
def PlotNormalProbability(sample, title="", ylabel=""):
    mu, var = thinkstats2.TrimmedMeanVar(sample, p=0.01)
    sigma = np.sqrt(var)
    xs = [-5, 5]
    fxs, fys = thinkstats2.FitLine(xs, inter=mu, slope=sigma)
    thinkplot.plot(fxs,
                   fys,
                   color='gray',
                   label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(
                       mu, sigma))
    xs, ys = thinkstats2.NormalProbability(sample)
    thinkplot.Plot(xs, ys, label="actual")
    thinkplot.Config(title=title, xlabel="z", ylabel=ylabel)
コード例 #6
0
def MakeNormalPlot(weights):
    """Generates a normal probability plot of birth weights.

    weights: sequence
    """
    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-5, 5]
    xs, ys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(xs, ys, color='0.8', label='model')

    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='weights')
コード例 #7
0
def MakeNormalPlot(x):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(df[x], p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(df[x])
    thinkplot.Plot(xs, ys, label='Number of Crimes')
    thinkplot.Show(title='Normal Prob Plot: {}'.format(x),
                   xlabel='Standard deviations from mean',
                   ylabel='Number of Crimes')
コード例 #8
0
def MakeExampleNormalPlot():
    """Generates a sample normal probability plot.
    """
    n = 1000
    thinkplot.PrePlot(3)

    mus = [0, 1, 5]
    sigmas = [1, 1, 2]
    for mu, sigma in zip(mus, sigmas):
        sample = np.random.normal(mu, sigma, n)
        xs, ys = thinkstats2.NormalProbability(sample)
        label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma)
        thinkplot.Plot(xs, ys, label=label)

    thinkplot.Save(root='analytic_normal_prob_example',
                   title='Normal probability plot',
                   xlabel='standard normal sample',
                   ylabel='sample values')
コード例 #9
0
def MakeNormalPlot(arrivalDelays):
    """Generate the normal probability plot for the arrival delays.
       This is a modified copy from analytic.py
    """

    mean = arrivalDelays.mean()
    std = arrivalDelays.std()

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(arrivalDelays)
    thinkplot.Plot(xs, ys, label='arrival delays (min)')

    thinkplot.Save(root='NormalModel_arrivaldelay_normalplot',
                   title='Normal probability plot',
                   xlabel='Standard deviations from mean',
                   ylabel='Arrival Delays (min)')
def plotDist(data):
    """ function to plot normal probability plot

    @param: data (series) - data to be ploted
    """
    n = 30
    thinkplot.PrePlot(3)

    mus = [0, 1, 5]
    sigmas = [1, 1, 2]

    for mu, sigma in zip(mus, sigmas):
        sample = data.sample(n=n)
        xs, ys = thinkstats2.NormalProbability(sample)
        label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma)
        thinkplot.Plot(xs, ys, label=label)

    thinkplot.Config(title='Normal probability plot',
                     xlabel='standard normal sample',
                     ylabel='sample values')
コード例 #11
0
thinkplot.Hist(male_pmf, width=width, align='left', color='blue')
thinkplot.Hist(female_pmf, width=width, align='right', color='red')
thinkplot.Config(ylabel='Probability')

#plot CDF
cdf = thinkstats2.Cdf(df.Age)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Age', ylabel='CDF')

#plot normal distribution
mean = df.Age.mean()
std = df.Age.std()
xs = [-4, 4]
fxs, fys = thinkstats2.FitLine(xs, inter=mean, slope=std)
thinkplot.Plot(fxs, fys, color='gray', label='model')
xs, ys = thinkstats2.NormalProbability(df.Age)
thinkplot.Plot(xs, ys, label='Age')

#scatter plots and correlation
#year vs. age
year = thinkstats2.Jitter(df.Year, .25)
thinkplot.Scatter(year, df.Age)
thinkplot.Show(xlabel='Year', ylabel='Age')
thinkstats2.Corr(df.Year, df.Age)
#drug vs. age
thinkplot.Scatter(df.Age, df.Drug)
thinkplot.Show(xlabel='Age', ylabel='Drug')

#testing a difference in gender
data = male.Age.values, female.Age.values
ht = DiffMeansPermute(data)
コード例 #12
0
                 xlabel='Birth weight (pounds)',
                 ylabel='CDF')

#%% [markdown]
# A normal probability plot is a visual test for normality.  The following example shows that if the data are actually from a normal distribution, the plot is approximately straight.

#%%
n = 1000
thinkplot.PrePlot(3)

mus = [0, 1, 5]
sigmas = [1, 1, 2]

for mu, sigma in zip(mus, sigmas):
    sample = np.random.normal(mu, sigma, n)
    xs, ys = thinkstats2.NormalProbability(sample)
    label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma)
    thinkplot.Plot(xs, ys, label=label)

thinkplot.Config(title='Normal probability plot',
                 xlabel='standard normal sample',
                 ylabel='sample values')

#%% [markdown]
# Here's the normal probability plot for birth weights, showing that the lightest babies are lighter than we expect from the normal mode, and the heaviest babies are heavier.

#%%
mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
std = np.sqrt(var)

xs = [-4, 4]
コード例 #13
0
y = thinkstats2.EvalNormalCdf(x, mu=mu, sigma=sigma)
thinkplot.plot(x, cdf.Probs(x), label='Data')
thinkplot.plot(x,
               y,
               label=r'Model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))
thinkplot.Config(xlabel="weight (pounds)", ylabel="CDF")

#%% [markdown]
# ## 5.3 Normal probability plt

#%%
n = 1000
thinkplot.PrePlot(3)
for mu, sigma in zip([0, 1, 5], [1, 1, 2]):
    sample = np.random.normal(mu, sigma, n)
    xs, ys = thinkstats2.NormalProbability(sample)
    thinkplot.plot(xs, ys, label=r"$\mu$={} $\sigma$={}".format(mu, sigma))
thinkplot.Config(title="Normal probability plot",
                 xlabel="standard normal sample",
                 ylabel="sample value")

#%%
mu, var = thinkstats2.TrimmedMeanVar(totalwgt_lb, p=0.01)
maturity = live[live.prglngth >= 37].totalwgt_lb.dropna()
sigma = np.sqrt(var)
xs = [-4, 4]
fxs, fys = thinkstats2.FitLine(xs, inter=mu, slope=sigma)
thinkplot.plot(fxs,
               fys,
               color='gray',
               label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))
コード例 #14
0
# Model fits data except for in th left tail up to 15 minutes

# If data is from a normal distribution then the plot will be straight

# In[72]:


n = 1000
thinkplot.PrePlot(3) 

mus = [0, 1, 5]
sigmas = [1, 1, 2]

for mu, sigma in zip(mus, sigmas):
    sample = np.random.normal(mu, sigma, n)
    xs, ys = thinkstats2.NormalProbability(sample)
    label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma)
    thinkplot.Plot(xs, ys, label=label)

thinkplot.Config(title='Normal probability plot',
                 xlabel='standard normal sample',
                 ylabel='sample values')


# Normal Probability Plot for trip duration

# In[73]:


mean, var = thinkstats2.TrimmedMeanVar(df.tripduration, p=0.01)
std = np.sqrt(var)
コード例 #15
0
ファイル: ch5.py プロジェクト: smithb16/ThinkStats2
        cdf = thinkstats2.MakeCdfFromPmf(pmf, label=label)
        thinkplot.Cdf(cdf, label=label)

    thinkplot.Show(xlabel='x', ylabel='CDF', axis=[-1, 4, 0, 1])

    ## make data frames
    live, firsts, others = first.MakeFrames()

    ## make normal probability plot of totalwgt_lb
    full_term_wgt = live.loc[live.prglngth >= 36, 'totalwgt_lb']
    weights = live.totalwgt_lb
    thinkplot.PrePlot(2)
    fxs, fys = MakeNormalPlot(weights)
    thinkplot.Plot(fxs, fys, color='gray', label='model')

    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='all live')

    xs, ys = thinkstats2.NormalProbability(full_term_wgt)
    thinkplot.Plot(xs, ys, label='full term')

    thinkplot.Show(xlabel='standard deviations from mean',
                   ylabel='Birth weight (lbs)',
                   title='Normal probability plot')

    ## make Pareto CDF from random variates
    t = [ParetoVariate(alpha=2, xm=1) for _ in range(1000)]
    cdf = thinkstats2.Cdf(t)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Show(xlabel='x', ylabel='CCDF', xscale='log', yscale='log')