Ejemplo n.º 1
0
def PlotHazard(complete, ongoing):
    """Plots the hazard function and survival function.

    complete: list of complete lifetimes
    ongoing: list of ongoing lifetimes
    """
    # plot S(t) based on only complete pregnancies
    cdf = thinkstats2.Cdf(complete)
    sf = SurvivalFunction(cdf)
    thinkplot.Plot(sf, label='old S(t)', alpha=0.1)

    thinkplot.PrePlot(2)

    # plot the hazard function
    hf = EstimateHazardFunction(complete, ongoing)
    thinkplot.Plot(hf, label='lams(t)', alpha=0.5)

    # plot the survival function
    sf = hf.MakeSurvival()

    thinkplot.Plot(sf, label='S(t)')
    thinkplot.Show(xlabel='t (weeks)')
Ejemplo n.º 2
0
    def testNormalPdf(self):
        pdf = thinkstats2.NormalPdf(mu=1, sigma=2)
        self.assertEqual(len(str(pdf)), 29)
        self.assertAlmostEqual(pdf.Density(3), 0.12098536226)

        pmf = pdf.MakePmf()
        self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
        xs, ps = pdf.Render()
        self.assertEqual(xs[0], -5.0)
        self.assertAlmostEqual(ps[0], 0.0022159242059690038)

        pmf = thinkstats2.Pmf(pdf)
        self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
        xs, ps = pmf.Render()
        self.assertEqual(xs[0], -5.0)
        self.assertAlmostEqual(ps[0], 0.00026656181123)
        
        cdf = thinkstats2.Cdf(pdf)
        self.assertAlmostEqual(cdf[1.0], 0.51199756478094904)
        xs, ps = cdf.Render()
        self.assertEqual(xs[0], -5.0)
        self.assertAlmostEqual(ps[0], 0.0)
Ejemplo n.º 3
0
def BinnedPercentiles(df):
    """Bin the data by age and plot percentiles of weight for each bin.

    df: DataFrame
    """
    bins = np.arange(10, 48, 3)
    indices = np.digitize(df.agepreg, bins)
    groups = df.groupby(indices)

    ages = [group.agepreg.mean() for i, group in groups][1:-1]
    cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]

    thinkplot.PrePlot(3)
    for percent in [75, 50, 25]:
        weights = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        thinkplot.Plot(ages, weights, label=label)

    thinkplot.Save(root='chap07scatter3',
                   formats=['jpg'],
                   xlabel="mother's age (years)",
                   ylabel='birth weight (lbs)')
Ejemplo n.º 4
0
def BinPerc(df):
    """
    param: df (data frame) - contains ages and weights
    """
    bins = np.arange(10, 48, 3)
    indices = np.digitize(df.agepreg, bins)
    groups = df.groupby(indices)

    ages = [group.agepreg.mean() for i, group in groups][1:-1]
    cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]

    plt.style.use('ggplot')

    percents = [25, 50, 75]
    for p in percents:
        weights = [cdf.Percentile(p) for cdf in cdfs]
        plt.plot(ages, weights, label=str(p))

    plt.title("Percentiles of Birth weight vs Mother's Age")
    plt.xlabel("Age (years)")
    plt.ylabel("Birth Weight (lbs)")
    plt.legend()
    plt.xlim(14, 45)
def main():

    counter = Counter()
    for i in range(10000):
        sample = ParetoSample(1.7, 0.001, 10000)
        counter.update(Counter(sample))

    print(len(counter))
    return

    pmf = thinkstats2.Pmf(counter)
    print('mean', pmf.Mean())
    for x, prob in pmf.Largest(10):
        print(x)

    cdf = thinkstats2.Cdf(pmf)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Show(xscale='log', yscale='log')
    return

    MakeFigure()
    MakeParetoCdf()
    print(TallestPareto(iters=2))
Ejemplo n.º 6
0
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CDF',
                     legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals',
                   legend=False)
def PlotResidualPercentiles(model, results, index=1, num_bins=20):
    """Plots percentiles of the residuals.

    model: StatsModel model object
    results: StatsModel results object
    index: which exogenous variable to use
    num_bins: how many bins to divide the x-axis into
    """
    exog = model.exog[:, index]
    resid = results.resid.values
    df = pandas.DataFrame(dict(exog=exog, resid=resid))

    bins = np.linspace(np.min(exog), np.max(exog), num_bins)
    indices = np.digitize(exog, bins)
    groups = df.groupby(indices)

    means = [group.exog.mean() for _, group in groups][1:-1]
    cdfs = [thinkstats2.Cdf(group.resid) for _, group in groups][1:-1]

    thinkplot.PrePlot(3)
    for percent in [75, 50, 25]:
        percentiles = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        thinkplot.Plot(means, percentiles, label=label)
Ejemplo n.º 8
0
def EstimateGoals(lam, m):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lams = []
    for _ in range(m):
        goals = SimulateGame(lam)
        lams.append(goals)

    print('RMSE of Goals: ', estimation.RMSE(lams, lam))
    print('Mean Error of Goals: ', estimation.MeanError(lams, lam))

    cdf = thinkstats2.Cdf(lams)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    VertLine(ci[0])
    VertLine(ci[1])

    thinkplot.Cdf(cdf)
    #thinkplot.Show(xlabel = 'Goals', ylabel = 'CumProb', title = 'Sampling Distribution, lam = ' + str(lam))
    thinkplot.SaveFormat(root='Q9_sampling_dist',
                         fmt='png',
                         xlabel='Goals',
                         ylabel='CumProb',
                         title='Sampling Distribution, lam = ' + str(lam))
Ejemplo n.º 9
0
    xs, ps = thinkstats2.RenderExpoCdf(lam, 0, 3.0, 50)
    label = r'$\lambda=%g$' % lam
    thinkplot.Plot(xs, ps, label=label)

thinkplot.Config(title='Exponential CDF',
                 xlabel='x',
                 ylabel='CDF',
                 loc='lower right')

#%% [markdown]
# Here's the distribution of interarrival times from a dataset of birth times.

#%%
df = analytic.ReadBabyBoom()
diffs = df.minutes.diff()
cdf = thinkstats2.Cdf(diffs, label='actual')

thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Time between births (minutes)', ylabel='CDF')

#%% [markdown]
# Here's what the CCDF looks like on a log-y scale.  A straight line is consistent with an exponential distribution.

#%%
thinkplot.Cdf(cdf, complement=True)
thinkplot.Config(xlabel='Time between births (minutes)',
                 ylabel='CCDF',
                 yscale='log',
                 loc='upper right')

#%% [markdown]
    # each range
    arrays = []
    for _, row in df.iterrows():
        vals = np.linspace(row.log_lower, row.log_upper, row.freq)
        arrays.append(vals)

    # collect the arrays into a single sample
    log_sample = np.concatenate(arrays)
    return log_sample

#%%
# create a log_sample (using modified InterpolateSample)
log_sample = InterpolateSample(df)

#%% get the cdf and plot it
log_cdf = thinkstats2.Cdf(log_sample)
thinkplot.Cdf(log_cdf)

# get a sample to calc mean, median
sample = np.power(10, log_sample)

mean, median = density.Summarize(sample)

#print("The mean is: {}".format(mean))
#print("The median is: {}".format(median))

#%%
# fraction of households below the mean
cdf = thinkstats2.Cdf(sample)
print('The fraction of households below the mean: {:.2f}'.format(cdf[mean]))
Ejemplo n.º 11
0
def Median(xs):
    cdf = thinkstats2.Cdf(xs)
    return cdf.Value(0.5)
Ejemplo n.º 12
0
    ## make pdf of birth weight and calculate statistics
    pdf = thinkstats2.EstimatedPdf(birth_weights)
    thinkplot.Pdf(pdf, label='birth weight')
    thinkplot.Show(xlabel='PDF', ylabel='lbs')

    ## make adult weight data frames
    adult_weights = df.wtkg2.dropna()

    ## evaluate skewness of adult weights
    pdf = thinkstats2.EstimatedPdf(adult_weights)
    thinkplot.Pdf(pdf, label='Adult weight')
    thinkplot.Show(xlabel='Adult weight (kg)', ylabel='PDF')

    ## weight kurtosis
    print('Kurtosis(adult_weights):\n', Kurtosis(adult_weights))
    print('SampleExcessKertosis(adult_weights):\n',
          SampleExcessKertosis(adult_weights))

    ## compute statistics of income data
    df = hinc.ReadData()
    log_sample = hinc2.InterpolateSample(df, log_upper=6.0)

    ## Convert sample from log $ to $
    sample = np.power(10, log_sample)
    cdf = thinkstats2.Cdf(sample, label='interp. data')
    thinkplot.Cdf(cdf)
    thinkplot.Show(xlabel='Income ($)', ylabel='CDF')

    ## Compute statistics
    SampleStatistics(sample)
Ejemplo n.º 13
0
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## bin data
    cleaned = df.dropna(subset=['htm3', 'wtkg2'])
    bins = np.arange(135, 210, 5)
    indices = np.digitize(cleaned.htm3, bins)
    groups = cleaned.groupby(indices)

    ## print binned data
    for i, group in groups:
        print(i, len(group))

    ## compute cdf for each group
    mean_heights = [group.htm3.mean() for i, group in groups]
    cdfs = [thinkstats2.Cdf(group.wtkg2) for i, group in groups]

    ## extract 25th, 50th, 75th percentiles
    for percent in [75, 50, 25]:
        weight_percentiles = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        thinkplot.Plot(mean_heights, weight_percentiles, label=label)

    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## re-bin data and make new cdfs
    bins = np.arange(135, 210, 15)
    indices = np.digitize(cleaned.htm3, bins)
Ejemplo n.º 14
0
    def MakeCdf(self):
        """Makes a CDF of lifetimes.

        returns: Cdf
        """
        return thinkstats2.Cdf(self.ts, 1 - self.ss)
Ejemplo n.º 15
0
#PMF
#creating a variable for PMF of NO2 AQI & SO2 AQI
no2_pmf = thinkstats2.Pmf(grp_pollution_df['NO2AQI'])
so2_pmf = thinkstats2.Pmf(grp_pollution_df['SO2AQI'])

thinkplot.PrePlot(2, cols=2)
thinkplot.Hist(no2_pmf, label='NO2', align='right', width=0.75)
thinkplot.Hist(so2_pmf, label='SO2', align='left', width=0.75)
thinkplot.Show(xlabel='Parts per Billion',
               ylabel='Probability',
               axis=[0, 80, 0, 0.10])

#creating the CDF of O3 AQI
t = (grp_pollution_df['O3AQI'])
cdf = thinkstats2.Cdf(t, label='O3')
thinkplot.Clf()
thinkplot.Cdf(cdf)
thinkplot.Show(xlabel='Parts per Million', ylabel='CDF')

#plotting a complementary CDF (CCDF) of O3
thinkplot.Cdf(cdf, complement=True)
thinkplot.Show(xlabel='minutes', ylabel='CCDF', yscale='log')

#normal CDF with a range of parameters
thinkplot.PrePlot(3)

mus = [1.0, 2.0, 3.0]  #should change to my own numbers instead
sigmas = [0.5, 0.4, 0.3]

for mu, sigma in zip(mus, sigmas):
Ejemplo n.º 16
0
    def testCdf(self):
        t = [1, 2, 2, 3, 5]
        pmf = thinkstats2.Pmf(t)
        hist = thinkstats2.Hist(t)

        cdf = thinkstats2.Cdf(pmf)
        self.assertEqual(len(str(cdf)), 37)

        self.assertEqual(cdf[0], 0)
        self.assertAlmostEqual(cdf[1], 0.2)
        self.assertAlmostEqual(cdf[2], 0.6)
        self.assertAlmostEqual(cdf[3], 0.8)
        self.assertAlmostEqual(cdf[4], 0.8)
        self.assertAlmostEqual(cdf[5], 1)
        self.assertAlmostEqual(cdf[6], 1)

        xs = range(7)
        ps = cdf.Probs(xs)
        for p1, p2 in zip(ps, [0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
            self.assertAlmostEqual(p1, p2)

        self.assertEqual(cdf.Value(0), 1)
        self.assertEqual(cdf.Value(0.1), 1)
        self.assertEqual(cdf.Value(0.2), 1)
        self.assertEqual(cdf.Value(0.3), 2)
        self.assertEqual(cdf.Value(0.4), 2)
        self.assertEqual(cdf.Value(0.5), 2)
        self.assertEqual(cdf.Value(0.6), 2)
        self.assertEqual(cdf.Value(0.7), 3)
        self.assertEqual(cdf.Value(0.8), 3)
        self.assertEqual(cdf.Value(0.9), 5)
        self.assertEqual(cdf.Value(1), 5)

        ps = np.linspace(0, 1, 11)
        xs = cdf.ValueArray(ps)
        self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())

        np.random.seed(17)
        xs = cdf.Sample(7)
        self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3])

        # when you make a Cdf from a Pdf, you might get some floating
        # point representation error
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertAlmostEqual(cdf[2], 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromPmf(pmf)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromItems(pmf.Items())
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(pmf.d)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromDict(pmf.d)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(hist)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromHist(hist)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(t)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromList(t)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(Counter(t))
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf2 = cdf.Copy()
        self.assertEqual(cdf2.Prob(2), 0.6)
        self.assertEqual(cdf2.Value(0.6), 2)
Ejemplo n.º 17
0
#%%
import random
sample = [random.gauss(mean, std) for _ in range(500)]
sample_pdf = thinkstats2.EstimatedPdf(sample)
thinkplot.Pdf(sample_pdf, label='sample KDE')
thinkplot.Pdf(pdf, label='normal')
thinkplot.Show(xlabel='height (cm)', ylabel='dencity')

#%%
import numpy as np
hist = thinkstats2.Hist(np.floor(sample))
thinkplot.Hist(hist)

#%%
cdf = thinkstats2.Cdf(np.floor(sample))
thinkplot.Cdf(cdf)

#%% [markdown]
#
# ### Raw moment
# $ m_k = \frac{1}{n} \sum_{i}{{x_i}^k} $


#%%
def RawMoment(xs, k):
    return sum(x**k for x in xs) / len(xs)


#%% [markdown]
#
Ejemplo n.º 18
0
 def testCdfProbs(self):
     t = [-1, 1, 2, 2, 3, 5]
     cdf = thinkstats2.Cdf(t)
     ps = cdf.Probs(t)
     print(ps)
Ejemplo n.º 19
0
def PlotSamplingDistributions(live):
    """Plots confidence intervals for the fitted curve and sampling dists.

    live: DataFrame
    """
    ages = live.agepreg
    weights = live.totalwgt_lb
    inter, slope = thinkstats2.LeastSquares(ages, weights)
    res = thinkstats2.Residuals(ages, weights, inter, slope)
    r2 = thinkstats2.CoefDetermination(weights, res)

    print('rho', thinkstats2.Corr(ages, weights))
    print('R2', r2)
    print('R', math.sqrt(r2))
    print('Std(ys)', thinkstats2.Std(weights))
    print('Std(res)', thinkstats2.Std(res))

    # plot the confidence intervals
    inters, slopes = SamplingDistributions(live, iters=1001)
    PlotConfidenceIntervals(ages,
                            inters,
                            slopes,
                            percent=90,
                            alpha=0.3,
                            label='90% CI')
    thinkplot.Text(42, 7.53, '90%')
    PlotConfidenceIntervals(ages,
                            inters,
                            slopes,
                            percent=50,
                            alpha=0.5,
                            label='50% CI')
    thinkplot.Text(42, 7.59, '50%')

    thinkplot.Save(root='linear3',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   legend=False)

    # plot the confidence intervals
    thinkplot.PrePlot(2)
    thinkplot.Scatter(ages, weights, color='gray', alpha=0.1)
    PlotConfidenceIntervals(ages, inters, slopes, res=res, alpha=0.2)
    PlotConfidenceIntervals(ages, inters, slopes)
    thinkplot.Save(root='linear5',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   title='90% CI',
                   axis=[10, 45, 0, 15],
                   legend=False)

    # plot the sampling distribution of slope under null hypothesis
    # and alternate hypothesis
    sampling_cdf = thinkstats2.Cdf(slopes)
    print('p-value, sampling distribution', sampling_cdf[0])

    ht = SlopeTest((ages, weights))
    pvalue = ht.PValue()
    print('p-value, slope test', pvalue)

    print('inter', inter, thinkstats2.Mean(inters))
    Summarize(inters, inter)
    print('slope', slope, thinkstats2.Mean(slopes))
    Summarize(slopes, slope)

    thinkplot.PrePlot(2)
    thinkplot.Plot([0, 0], [0, 1], color='0.8')
    ht.PlotCdf(label='null hypothesis')
    thinkplot.Cdf(sampling_cdf, label='sampling distribution')
    thinkplot.Save(root='linear4',
                   xlabel='slope (lbs / year)',
                   ylabel='CDF',
                   xlim=[-0.03, 0.03],
                   loc='upper left')
Ejemplo n.º 20
0
print('Cohen\'s d for pregnancy length in weeks:', plen_cohend)

#--- Chapter3 Ex1
actual_pmf = thinkstats2.Pmf(resp.numkdhh, label='actual')
biased_pmf = BiasPmf(actual_pmf, label='biased')
thinkplot.PrePlot(2)
actual_hist = thinkplot.Pmf(actual_pmf)
biased_hist = thinkplot.Pmf(biased_pmf)
thinkplot.Show(xlabel='#kids in household', ylabel='PMF')
print('Actual Mean:', actual_pmf.Mean())
print('Biased Mean:', biased_pmf.Mean())

#--- Chapter4 Ex2
my_seq = np.random.random(1000)
my_pmf = thinkstats2.Pmf(my_seq)
my_cdf = thinkstats2.Cdf(my_seq)
thinkplot.Pmf(my_pmf, linewidth=0.1)
thinkplot.Show(xlabel='Random variable', ylabel='PMF')
thinkplot.Cdf(my_cdf)
thinkplot.Show(xlabel='Random variable', ylabel='CDF')

#--- Chapter5 Ex1
mu = 178
sigma = 7.7
mhgt_dist = scipy.stats.norm(loc=mu, scale=sigma)
m1 = 177.8  #5'10" in cm
m2 = 185.42  #6'1" in cm
print('Percent Male population between 5\'10" and 6\'1" is %.2f' %
      (100 * (mhgt_dist.cdf(m2) - mhgt_dist.cdf(m1))))

#--- Chapter7 Ex1
Ejemplo n.º 21
0

#%%
t = [1, 2, 2, 3, 5]
for x in range(6): 
    print("CDF({0}) = {1}".format(x, EvalCdf(t, x)))
#%% [markdown]
# ## 4.4 CDF の表現

#%%
import thinkstats2
import first
import thinkplot

live , firsts,others = first.MakeFrames() 
cdf = thinkstats2.Cdf(live.prglngth, label='prglngth')
thinkplot.Cdf(cdf)
thinkplot.show(xlabel='weeks', ylabel='CDF')

#%%

print("10% {0} weeks".format(cdf.Value(0.1)))
print("90% {0} weeks".format(cdf.Value(0.9)))

#%% [markdown]

# ## 4.5 CDFを比較する

#%%
first_cdf = thinkstats2.Cdf(firsts.totalwgt_lb, label='first')
other_cdf = thinkstats2.Cdf(others.totalwgt_lb, label='other')
Ejemplo n.º 22
0
def ComputeProbSurvival(ts, ss, t):
    """Given a survival curve, find the probability of survival >= t."""
    ps = [1 - s for s in ss]
    cdf = thinkstats2.Cdf(ts, ps)
    s = 1 - cdf.Prob(t)
    return s
Ejemplo n.º 23
0
 def testShift(self):
     t = [1, 2, 2, 3, 5]
     cdf = thinkstats2.Cdf(t)
     cdf2 = cdf.Shift(1)
     self.assertEqual(cdf[1], cdf2[2])
Ejemplo n.º 24
0
#Plot pmf of age range for clicked ads vs non clicked ads

width=1000
axis = [10000, 70000, 0, 0.01]
thinkplot.PrePlot(2)
#thinkplot.SubPlot(2)
thinkplot.Pmfs([clicked_pmf, nonclicked_pmf])
thinkplot.Config(xlabel='Area Income', axis=axis)
thinkplot.show()

############################################################################
#############################Section 3 -CDF#################################
############################################################################

age_grp_30_to_39_cdf = thinkstats2.Cdf(age_grp_30_to_39_ds.Daily_Time_Spent, label='30-39')
age_grp_18_to_29_cdf = thinkstats2.Cdf(age_grp_18_to_29_ds.Daily_Time_Spent, label='18-29')

thinkplot.PrePlot(2)
thinkplot.Cdfs([age_grp_30_to_39_cdf, age_grp_18_to_29_cdf])
thinkplot.Config(xlabel='Daily Time Spent in minutes', ylabel='CDF')
thinkplot.show()

male_cdf = thinkstats2.Cdf(male_ds.Daily_Time_Spent, label='male')
female_cdf = thinkstats2.Cdf(female_ds.Daily_Time_Spent, label='female')

thinkplot.PrePlot(2)
thinkplot.Cdfs([male_cdf, female_cdf])
thinkplot.Config(xlabel='Daily Time Spent in minutes', ylabel='CDF')
thinkplot.show()