Example #1
0
    def MakePlot(self, root='redline2'):
        """Plots the computed CDFs.

        root: string
        """
        print 'Mean z', self.pmf_z.Mean() / 60
        print 'Mean zb', self.pmf_zb.Mean() / 60
        print 'Mean y', self.pmf_y.Mean() / 60

        cdf_z = self.pmf_z.MakeCdf()
        cdf_zb = self.pmf_zb.MakeCdf()
        cdf_y = self.pmf_y.MakeCdf()

        cdfs = ScaleDists([cdf_z, cdf_zb, cdf_y], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(3)
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)

        root += 'a'
        pmfs = self.pmf_z, self.pmf_zb, self.pmf_y
        pmfs = ScaleDists(pmfs, 1.0 / 60)
        thinkplot.PrePlot(3)
        thinkplot.Pmfs(pmfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='Probability',
                       formats=FORMATS)
Example #2
0
def MakePlots(player1, player2):
    """Generates two plots.

    price1 shows the priors for the two players
    price2 shows the distribution of diff for the two players
    """

    # plot the prior distribution of price for both players
    MakePrice1(player1, player2)
    thinkplot.Save(root='price1',
                xlabel='price ($)',
                ylabel='PDF',
                formats=FORMATS)


    # plot the historical distribution of underness for both players
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    cdf1 = player1.CdfDiff()
    cdf1.name = 'player 1'
    cdf2 = player2.CdfDiff()
    cdf2.name = 'player 2'

    print('Player median', cdf1.Percentile(50))
    print('Player median', cdf2.Percentile(50))

    print('Player 1 overbids', player1.ProbOverbid())
    print('Player 2 overbids', player2.ProbOverbid())

    thinkplot.Cdfs([cdf1, cdf2])
    thinkplot.Save(root='price2',
                xlabel='diff ($)',
                ylabel='CDF',
                formats=FORMATS)
Example #3
0
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')
Example #4
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    speeds = relay.BinData(speeds, 3, 12, 100)

    # plot the distribution of actual speeds
    pmf = thinkstats2.Pmf(speeds, 'actual speeds')

    # plot the biased distribution seen by the observer
    biased = ObservedPmf(pmf, 7.5, label='observed speeds')

    thinkplot.Pmf(biased)
    thinkplot.Save(root='observed_speeds',
                   title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='PMF')

    cdf = thinkstats2.Cdf(pmf)
    cdf_biased = thinkstats2.Cdf(biased)

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, cdf_biased])
    thinkplot.Save(root='observed_speeds_cdf',
                   title='CDF of running speed',
                   xlabel='speed (mph)',
                   ylabel='CDF')
Example #5
0
def MakeCdfs(male, female):
    malecdf = thinkstats2.Cdf(male.alcwknd, label='Male')
    femalecdf = thinkstats2.Cdf(female.alcwknd, label='Female')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([malecdf, femalecdf])
    thinkplot.Config(xlabel='Alcohol Consumed (grams)',
                     ylabel='CDF',
                     title='Weekend Alcohol Consumption')
    thinkplot.Show()
Example #6
0
def MakeCdfs(male, female):
    malecdf = thinkstats2.Cdf(male.totalwgt_lb, label='Male')
    femalecdf = thinkstats2.Cdf(female.totalwgt_lb, label='Female')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([malecdf, femalecdf])
    thinkplot.Config(xlabel='Baby Weight (Lbs)',
                     ylabel='CDF',
                     title='Baby Weights')
    thinkplot.Show()
Example #7
0
def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    thinkplot.Clf()
    thinkplot.Cdf(pool.age_cdf)
    thinkplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    thinkplot.Clf()
    thinkplot.Cdf(pool.weight_cdf)
    thinkplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    thinkplot.Clf()
    thinkplot.Cdfs([firsts.age_cdf, others.age_cdf])
    thinkplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    thinkplot.Clf()
    thinkplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    thinkplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    thinkplot.clf()
    thinkplot.Scatter(ages, weights, alpha=0.2)
    thinkplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)
Example #8
0
def MakePrice2(player1, player2):
    """ plot the historical distribution of underness for both players"""

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    cdf1 = player1.CdfDiff()
    cdf1.name = 'player 1'
    cdf2 = player2.CdfDiff()
    cdf2.name = 'player 2'
    thinkplot.Cdfs([cdf1, cdf2])
Example #9
0
def MakeCdfs(greq, less):
    greqcdf = thinkstats2.Cdf(greq.totalwgt_lb, label='greater/equal to 30')
    lesscdf = thinkstats2.Cdf(less.totalwgt_lb, label='less than 30')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([greqcdf, lesscdf])
    thinkplot.Config(xlabel='Weight (lbs)', ylabel='CDF')
    thinkplot.Show()

    print 'Greater/equal to 30 50th percentile:', greqcdf.Percentile(50)
    print 'Less than 30 50th percentile:', lesscdf.Percentile(50)
Example #10
0
def TestSample(live):
    weights = live.totalwgt_lb
    cdf = thinkstats2.Cdf(weights, label='totalwgt_lb')

    sample = cdf.Sample(1000)
    sample_cdf = thinkstats2.Cdf(sample, label='sample')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, sample_cdf])
    thinkplot.Save(root='cumulative_sample',
                   xlabel='weight (pounds)',
                   ylabel='CDF')
Example #11
0
def CH6_5(diff1, diff2):
    """
    两组展品的出价差的CDF累计分布
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    diff1_cdf = thinkbayes.MakeCdfFromList(diff1, name='diff1')
    diff2_cdf = thinkbayes.MakeCdfFromList(diff2, name='diff2')

    thinkplot.Cdfs([diff1_cdf, diff2_cdf])
    thinkplot.Show(xlabel='diff $', ylabel="CDF")

    # 计算CDF(diff <= 0), 判断选手是否偏向低估商品
    print(diff1_cdf.Prob(0), diff2_cdf.Prob(0))
Example #12
0
def PlotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.Clf()
    thinkplot.Cdfs(cdfs)
    thinkplot.Save(root='variability_cdfs',
                   title='CDF of height',
                   xlabel='Reported height (cm)',
                   ylabel='CDF')
Example #13
0
def TestSample(live):
    """Plots the distribution of weights against a random sample.

    live: DataFrame for live births
    """
    weights = live.totalwgt_lb
    cdf = thinkstats2.Cdf(weights, label='totalwgt_lb')

    sample = cdf.Sample(1000)
    sample_cdf = thinkstats2.Cdf(sample, label='sample')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, sample_cdf])
    thinkplot.Save(root='cumulative_sample',
                   xlabel='weight (pounds)',
                   ylabel='CDF')
Example #14
0
    def PlotPosteriors(self, other):
        """Plots posterior distributions of efficacy.

        self, other: Sat objects.
        """
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)

        cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score)
        cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score)

        thinkplot.Cdfs([cdf1, cdf2])
        thinkplot.Save(xlabel='efficacy',
                       ylabel='CDF',
                       axis=[0, 4.6, 0.0, 1.0],
                       root='sat_posteriors_eff',
                       formats=['pdf', 'eps'])
Example #15
0
def TestCorrelation(cdf):
    """Tests the correlated generator.

    Makes sure that the sequence has the right distribution and correlation.
    """
    n = 10000
    rho = 0.4

    rdt_seq = CorrelatedGenerator(cdf, rho)
    xs = [rdt_seq.next() for _ in range(n)]

    rho2 = correlation.SerialCorr(xs)
    print(rho, rho2)
    cdf2 = thinkbayes.MakeCdfFromList(xs)

    thinkplot.Cdfs([cdf, cdf2])
    thinkplot.Show()
Example #16
0
    def MakePlot(self, root='redline1'):
        """Plot the prior and posterior CDF of passengers arrival rate.

        root: string
        """
        thinkplot.Clf()
        thinkplot.PrePlot(2)

        # convert units to passengers per minute
        prior = self.prior_lam.MakeCdf().Scale(60)
        post = self.post_lam.MakeCdf().Scale(60)

        thinkplot.Cdfs([prior, post])

        thinkplot.Save(root=root,
                       xlabel='Arrival rate (passengers / min)',
                       ylabel='CDF',
                       formats=FORMATS)
Example #17
0
def compareAlaskaAirlinesCdf(alaska, others):
    """Create CDF to compare Alaska Airlines versus other airlines
       Per JD Power: Among traditional carriers, Alaska Airlines ranks highest for the 12th consecutive year
       https://www.jdpower.com/business/press-releases/2019-north-america-airline-satisfaction-study 

    """
    # plot CDFs of arrival delays for alaska airlines and others
    alaska_cdf = thinkstats2.Cdf(alaska.ARRIVAL_DELAY, label='Alaska Airlines')
    other_cdf = thinkstats2.Cdf(others.ARRIVAL_DELAY, label='other')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([alaska_cdf, other_cdf])
    #    thinkplot.Show(xlabel='arrival delay (min)', ylabel='CDF', axis=[-20, 40, 0, 1])
    thinkplot.Save(root='AlaskaAirlines_ArrivalDelay_cdf',
                   title='Arrival delay',
                   xlabel='arrival delay (min)',
                   ylabel='CDF',
                   axis=[-20, 40, 0, 1])
Example #18
0
def MakeFigures(live, firsts, others):
    """Creates several figures for the book.

    live: DataFrame
    firsts: DataFrame
    others: DataFrame
    """

    first_wgt = firsts.totalwgt_lb
    first_wgt_dropna = first_wgt.dropna()
    print('Firsts', len(first_wgt), len(first_wgt_dropna))
    #assert len(first_wgt_dropna) == 4381
 
    other_wgt = others.totalwgt_lb
    other_wgt_dropna = other_wgt.dropna()
    print('Others', len(other_wgt), len(other_wgt_dropna))
    #assert len(other_wgt_dropna) == 4706

    first_pmf = thinkstats2.Pmf(first_wgt_dropna, label='first')
    other_pmf = thinkstats2.Pmf(other_wgt_dropna, label='other')

    width = 0.4 / 16

    # plot PMFs of birth weights for first babies and others
    thinkplot.PrePlot(2)
    thinkplot.Hist(first_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='cumulative_birthwgt_pmf',
                   title='Birth weight',
                   xlabel='weight (pounds)',
                   ylabel='PMF')

    # plot CDFs of birth weights for first babies and others
    first_cdf = thinkstats2.Cdf(firsts.totalwgt_lb, label='first')
    other_cdf = thinkstats2.Cdf(others.totalwgt_lb, label='other')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([first_cdf, other_cdf])
    thinkplot.Save(root='cumulative_birthwgt_cdf',
                   title='Birth weight',
                   xlabel='weight (pounds)',
                   ylabel='CDF',
                   axis=[0, 12.5, 0, 1]
                   )
Example #19
0
    def MakePlot(self, root='redline3'):
        """Plot the CDFs.

        root: string
        """
        # observed gaps
        cdf_prior_x = self.prior_x.MakeCdf()
        cdf_post_x = self.post_x.MakeCdf()
        cdf_y = self.pmf_y.MakeCdf()

        cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(3)
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
Example #20
0
    def PlotConditionalCdfs(self):
        """Plots the cdf of ages for each bucket."""
        buckets = [7.0, 16.0, 23.0, 27.0]
        # 2.01, 4.95 cm, 9.97 cm, 14.879 cm
        names = ['2 cm', '5 cm', '10 cm', '15 cm']
        cdfs = []

        for bucket, name in zip(buckets, names):
            cdf = self.cache.ConditionalCdf(bucket, name)
            cdfs.append(cdf)

        thinkplot.Clf()
        thinkplot.PrePlot(num=len(cdfs))
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root='kidney6',
                       title='Distribution of age for several diameters',
                       formats=FORMATS,
                       xlabel='tumor age (years)',
                       ylabel='CDF',
                       loc=4)
Example #21
0
def PlotPosteriors():
    thinkbayes.RandomSeed(18)

    data1 = FakeData(100, 0.03)
    data2 = FakeData(100, 0.05)

    pmf1 = MakePosterior(data1, name="headline a")
    pmf2 = MakePosterior(data2, name="headline b")

    lt = pmf1 < pmf2
    eq = pmf1 == pmf2
    gt = pmf1 > pmf2

    print lt + eq / 2
    print gt + eq / 2

    cdf1 = pmf1.MakeCdf()
    cdf2 = pmf2.MakeCdf()
    thinkplot.PrePlot(num=2)
    thinkplot.Cdfs([cdf1, cdf2])
    thinkplot.Show(axis=[0, 0.2, 0, 1])
width=0.45
thinkplot.PrePlot(2, cols=2)
thinkplot.Hist(wins_pmf, align='right', width=width)
thinkplot.Hist(runs_pmf, align='left', width=width)
thinkplot.Config(xlabel='Result', ylabel='PMF')


# In[30]:

# Building the CDFs of both the wins and the runs
wins_cdf = thinkstats2.Cdf(wins, label='Wins')
runs_cdf = thinkstats2.Cdf(runs, label='Runs')

thinkplot.PrePlot(2)
thinkplot.Cdfs([wins_cdf, runs_cdf])
thinkplot.Config(xlabel='Result', ylabel='CDF')


# In[32]:

(data.W.values) # I will use these numpy arrays later, that's why i created them here.


# In[34]:

(data.R.values)


# In[46]:
# anim = viewer.animate(frames=100)
# # plt.show()

RandomSeed(17)

env = Sugarscape(50,
                 num_agents=250,
                 min_lifespan=60,
                 max_lifespan=100,
                 replace=True)

cdfs = []
for i in range(5):
    [env.step() for i in range(100)]
    cdf = Cdf(agent.sugar for agent in env.agents)
    cdfs.append(cdf)

thinkplot.preplot(cols=2)
thinkplot.Cdfs(cdfs[:-1], color='gray', alpha=0.3)
thinkplot.Cdf(cdfs[-1])
thinkplot.config(xlabel='Wealth', ylabel='CDF')
thinkplot.bigger_text()

thinkplot.subplot(2)
thinkplot.Cdfs(cdfs[:-1], color='gray', alpha=0.3)
thinkplot.Cdf(cdfs[-1])
thinkplot.config(xlabel='Wealth', ylabel='CDF', xscale='log')
thinkplot.bigger_text()

thinkplot.save('chap09-4')
Example #24
0
thinkplot.show(xlabel='weeks', ylabel='CDF')

#%%

print("10% {0} weeks".format(cdf.Value(0.1)))
print("90% {0} weeks".format(cdf.Value(0.9)))

#%% [markdown]

# ## 4.5 CDFを比較する

#%%
first_cdf = thinkstats2.Cdf(firsts.totalwgt_lb, label='first')
other_cdf = thinkstats2.Cdf(others.totalwgt_lb, label='other')
thinkplot.PrePlot(2)
thinkplot.Cdfs([first_cdf, other_cdf])
thinkplot.Show(xlabel='weight (pounds)', ylabel='CDF') 

#%% [markdown]

# ## 4.6 パーセンタイル派生統計量

# - 中央値(median):50位パーセンタイル値
# - 四分位範囲(interquartile range, IQR):75位 - 25位パーセンタイル値
# - 分位数(quantiles):CDFにおいて等間隔で表現される統計量
#%% [markdown]
# ## 4.7 乱数

#%%
import numpy as np
weights = live.totalwgt_lb
Example #25
0
axis = [10000, 70000, 0, 0.01]
thinkplot.PrePlot(2)
#thinkplot.SubPlot(2)
thinkplot.Pmfs([clicked_pmf, nonclicked_pmf])
thinkplot.Config(xlabel='Area Income', axis=axis)
thinkplot.show()

############################################################################
#############################Section 3 -CDF#################################
############################################################################

age_grp_30_to_39_cdf = thinkstats2.Cdf(age_grp_30_to_39_ds.Daily_Time_Spent, label='30-39')
age_grp_18_to_29_cdf = thinkstats2.Cdf(age_grp_18_to_29_ds.Daily_Time_Spent, label='18-29')

thinkplot.PrePlot(2)
thinkplot.Cdfs([age_grp_30_to_39_cdf, age_grp_18_to_29_cdf])
thinkplot.Config(xlabel='Daily Time Spent in minutes', ylabel='CDF')
thinkplot.show()

male_cdf = thinkstats2.Cdf(male_ds.Daily_Time_Spent, label='male')
female_cdf = thinkstats2.Cdf(female_ds.Daily_Time_Spent, label='female')

thinkplot.PrePlot(2)
thinkplot.Cdfs([male_cdf, female_cdf])
thinkplot.Config(xlabel='Daily Time Spent in minutes', ylabel='CDF')
thinkplot.show()



##################################################################################################
############################# Section 4 -Analytical Distribution #################################
Example #26
0
from code import *
import thinkplot

y2 = y1.flatten()

pmf_scores = thinkstats2.Pmf(y2)
thinkplot.Hist(pmf_scores)
thinkplot.Config(xlabel='Runs Scored', ylabel='probability', axis=[0, 20, 0, 0.3])

cdf_scores = thinkstats2.Cdf(y2, label='Runs Scored')
cdf_ld = thinkstats2.Cdf(X3['bat_LD%'], label='Line Drives')
cdf_pop = thinkstats2.Cdf(X3['bat_POP%'], label='Pop Ups')
cdf_gb = thinkstats2.Cdf(X3['bat_GB%'], label='Ground Balls')

thinkplot.PrePlot(4)
thinkplot.Cdfs([cdf_scores, cdf_ld, cdf_pop, cdf_gb])
thinkplot.Show(xlabel='balls in play (%)', ylabel='CDF')

# Visualizing data in One Dimension (1-D)
import matplotlib.pyplot as plt
y.hist(bins=15, color='steelblue', edgecolor='black', linewidth=1.0,
           xlabelsize=8, ylabelsize=8, grid=False)    
plt.tight_layout(rect=(0, 15, 0, 15)) 

# visualizing one of the continuous, numeric attributes
# Histogram
fig = plt.figure(figsize = (10,4))
title = fig.suptitle("Runs", fontsize=14)
fig.subplots_adjust(top=0.85, wspace=0.1)

ax = fig.add_subplot(1,1, 1)
Example #27
0
def WeightDiffInFirstOther(first, other):
    first_cdf = thinkstats2.Cdf(first.totalwgt_lb, label='First')
    other_cdf = thinkstats2.Cdf(other.totalwgt_lb, label='Other')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([first_cdf, other_cdf])
    thinkplot.Show(xlabel='Weight (pounds)', ylabel='CDF')
Example #28
0
thinkplot.Config(xlabel='trip duration (minutes)', ylabel='CDF', loc='upper left')


# In[33]:


chilly_tripduration_cdf = thinkstats2.Cdf(chilly_df.tripduration, label='chilly trip duration')
thinkplot.Cdf(chilly_tripduration_cdf)
thinkplot.Config(xlabel='trip duration (minutes)', ylabel='CDF', loc='upper left')


# In[34]:


#comparison
thinkplot.Cdfs([chilly_tripduration_cdf,warm_tripduration_cdf])
thinkplot.Show(xlabel='trip duration (minutes)',ylabel='CDF')


# By comparing colder and warmer temperatures with their duration, we can see that chilly bike rides are slightly shorter than warmer bike rides

# Analytical Distribution

# In[35]:


#NORMAL CDF to for visual
thinkplot.PrePlot(3)

mus = [1.0, 2.0, 3.0]
sigmas = [0.5, 0.4, 0.3]
Example #29
0
over_three_hr = moving_time[moving_time > 10800]
less_three_hr = moving_time[moving_time <= 10800]
pmf_more = thinkstats2.Pmf(over_three_hr, label="More Than Three HR")
pmf_less = thinkstats2.Pmf(less_three_hr, label='Less Than Three HR')
pmf_stuff(1, 8000, 22000, 0, pmf_more, pmf_less, 'Ride Length (Min)', 0.02)

########### PART SIX ############
cdf = thinkstats2.Cdf(moving_time, label='Moving Time')
thinkplot.Cdf(cdf)
thinkplot.Show(xlabel='Moving Time in Min', ylabel='CDF')

more_cdf = thinkstats2.Cdf(over_one_hr, label='Over Than One Hr')
less_cdf = thinkstats2.Cdf(less_one_hr, label='Less Than One Hr')
thinkplot.PrePlot(2)
thinkplot.Cdfs([more_cdf, less_cdf])
thinkplot.Show(xlabel='Moving Time (Min)', ylabel='CDF')

########### PART SEVEN ############
avg_watts = average_watts.dropna()


def MakeNormalModel(data, label):
    cdf = thinkstats2.Cdf(data, label=label)

    mean, var = thinkstats2.TrimmedMeanVar(data)
    std = np.sqrt(var)
    print('n, mean, std', len(data), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std