Esempio n. 1
0
    def testPmf(self):
        pmf = thinkstats2.Pmf('allen')
        # this one might not be a robust test
        self.assertEquals(len(str(pmf)), 45)

        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)
        self.assertEquals(pmf['l'], 0.4)
        self.assertEquals(pmf.Percentile(50), 'l')

        pmf = thinkstats2.Pmf(Counter('allen'))
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf = thinkstats2.Pmf(pmf)
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf = thinkstats2.Pmf(pmf.d.items())
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf2 = pmf.Copy()
        self.assertEquals(pmf, pmf2)

        xs, ys = pmf.Render()
        self.assertEquals(tuple(xs), tuple(sorted(pmf.Values())))
Esempio n. 2
0
 def testPmfFromCdf(self):
     t = [1, 2, 2, 3, 5]
     pmf = thinkstats2.Pmf(t)
     cdf = thinkstats2.Cdf(pmf)
     pmf2 = thinkstats2.Pmf(cdf)
     for x in pmf.Values():
         self.assertAlmostEquals(pmf[x], pmf2[x])
Esempio n. 3
0
def MakeStep(greq, less):
    axis = [0, 50, 0, 0.6]

    greqpmf = thinkstats2.Pmf(greq.prglngth, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.prglngth, label='less than 30')
    thinkplot.Pmfs([greqpmf, lesspmf])
    thinkplot.Config(xlabel='Pregnancy length(weeks)', axis=axis)
    thinkplot.Show()
Esempio n. 4
0
    def testSortedItems(self):
        pmf = thinkstats2.Pmf('allen')
        items = pmf.SortedItems()
        self.assertEqual(len(items), 4)

        pmf =  thinkstats2.Pmf(['a', float('nan'), 1, pmf])
        # should generate a warning
        items = pmf.SortedItems()
        self.assertEqual(len(items), 4)
Esempio n. 5
0
def MakePmfs(greq, less):
    width = 0.45
    axis = [0, 50, 0, 0.6]

    greqpmf = thinkstats2.Pmf(greq.prglngth, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.prglngth, label='less than 30')
    thinkplot.Hist(lesspmf, align='left', width=width)
    thinkplot.Hist(greqpmf, align='right', width=width)
    thinkplot.Config(axis=axis)
    thinkplot.Show()
Esempio n. 6
0
def MakePmfs(greq, less):
    axis = [0, 15, 0, 0.04]
    width = .4 / 16

    greqpmf = thinkstats2.Pmf(greq.totalwgt_lb, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.totalwgt_lb, label='less than 30')
    thinkplot.Pmf(lesspmf, align='left', width=width)
    thinkplot.Pmf(greqpmf, align='right', width=width)
    thinkplot.Config(axis=axis)
    thinkplot.Show()
Esempio n. 7
0
def MakeStep(male, female):
    axis = [0, 800, 0, 0.1]

    malepmf = thinkstats2.Pmf(male.alcwknd, label='Male')
    femalepmf = thinkstats2.Pmf(female.alcwknd, label='Female')
    thinkplot.Pmfs([malepmf, femalepmf])
    thinkplot.Config(xlabel='Alcohol Consumption (grams)',
                     ylabel='PMF',
                     axis=axis,
                     title='Weekend Alcohol Consumption')
    thinkplot.Show()
Esempio n. 8
0
def MakePmfs(male, female):
    width = 0.45
    axis = [0, 800, 0, 0.1]

    malepmf = thinkstats2.Pmf(male.alcwknd, label='Male')
    femalepmf = thinkstats2.Pmf(female.alcwknd, label='Female')
    thinkplot.Hist(malepmf, align='left', width=width)
    thinkplot.Hist(femalepmf, align='right', width=width)
    thinkplot.Config(xlabel='Alcohol Consumption (grams)',
                     ylabel='PMF',
                     axis=axis,
                     title='Weekend Alcohol Consumption')
    thinkplot.Show()
 def testPmfMax(self):
     d6 = thinkstats2.Pmf(range(1, 7))
     two = d6 + d6
     three = two + d6
     cdf = three.Max(6)
     thinkplot.Cdf(cdf)
     self.assertAlmostEqual(cdf[14], 0.558230962626)
Esempio n. 10
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    pmf = thinkstats2.Pmf(live.prglngth)

    # test Mode
    mean = PmfMean(pmf)
    print('Mean of preg length', mean)
    assert mean == pmf.Mean(), mean

    variance = PmfVar(pmf)
    print('Variance of preg length', variance)
    assert variance == pmf.Var(), variance

    # test AllModes
    # modes = AllModes(hist)
    # assert modes[0][1] == 4693, modes[0][1]

    # for value, freq in modes[:5]:
    # print(value, freq)

    print('%s: All tests passed.' % script)
Esempio n. 11
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    speeds = relay.BinData(speeds, 3, 12, 100)

    # plot the distribution of actual speeds
    pmf = thinkstats2.Pmf(speeds, 'actual speeds')

    # plot the biased distribution seen by the observer
    biased = ObservedPmf(pmf, 7.5, label='observed speeds')

    thinkplot.Pmf(biased)
    thinkplot.Save(root='observed_speeds',
                   title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='PMF')

    cdf = thinkstats2.Cdf(pmf)
    cdf_biased = thinkstats2.Cdf(biased)

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, cdf_biased])
    thinkplot.Save(root='observed_speeds_cdf',
                   title='CDF of running speed',
                   xlabel='speed (mph)',
                   ylabel='CDF')
Esempio n. 12
0
    def testCdf(self):
        t = [1, 2, 2, 3, 5]
        pmf = thinkstats2.Pmf(t)

        cdf = thinkstats2.Cdf(pmf)
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertAlmostEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(pmf.Items())
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertAlmostEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(t)
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertAlmostEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(Counter(t))
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertAlmostEquals(cdf.Value(0.6), 2)

        cdf2 = cdf.Copy()
        self.assertAlmostEquals(cdf2.Prob(2), 0.6)
        self.assertAlmostEquals(cdf2.Value(0.6), 2)
Esempio n. 13
0
def ClassSizes():
    """Generate PMFs of observed and actual class size.
    """
    # start with the actual distribution of class sizes from the book
    d = {7: 8, 12: 8, 17: 14, 22: 4, 27: 6, 32: 12, 37: 8, 42: 3, 47: 2}

    # form the pmf
    pmf = thinkstats2.Pmf(d, label='actual')
    print('mean', pmf.Mean())
    print('var', pmf.Var())

    # compute the biased pmf
    biased_pmf = BiasPmf(pmf, label='observed')
    print('mean', biased_pmf.Mean())
    print('var', biased_pmf.Var())

    # unbias the biased pmf
    unbiased_pmf = UnbiasPmf(biased_pmf, label='unbiased')
    print('mean', unbiased_pmf.Mean())
    print('var', unbiased_pmf.Var())

    # plot the Pmfs
    thinkplot.PrePlot(2)
    thinkplot.Pmfs([pmf, biased_pmf])
    thinkplot.Save(root='class_size1',
                   xlabel='class size',
                   ylabel='PMF',
                   axis=[0, 52, 0, 0.27])
    def MakeModel(self):
        observado, esperado = self.data
        self.n = len(observado)
        self.pool = np.hstack((observado, esperado))

        pmf = thinkstats2.Pmf(self.pool)
        self.values = observado
        self.expected_probs = np.array(pmf.Probs(self.values))
Esempio n. 15
0
    def MakeModel(self):
        firsts, others = self.data
        self.n = len(firsts)
        self.pool = np.hstack((firsts, others))

        pmf = thinkstats2.Pmf(self.pool)
        self.values = range(35, 44)
        self.expected_probs = np.array(pmf.Probs(self.values))
Esempio n. 16
0
def main():
    live, firsts, others = first.MakeFrames()
    diffs = PairwiseDiff(live)
    mean = thinkstats2.Mean(diffs)
    print('Mean: ', mean)
    pmf = thinkstats2.Pmf(diffs)
    thinkplot.Hist(pmf)
    thinkplot.Show(xlabel='Diff in wks', ylabel='PMF')
Esempio n. 17
0
def main():
    preg = nsfg.ReadFemPreg()
    live = preg[preg.outcome == 1]
    pmf = thinkstats2.Pmf(live.prglngth)

    assert (pmf.Mean() == PmfMean(pmf))
    assert (pmf.Var() == PmfVar(pmf))
    print('All test pased')
Esempio n. 18
0
 def testPmfProbLess(self):
     d6 = thinkstats2.Pmf(range(1,7))
     self.assertEqual(d6.ProbLess(4), 0.5)
     self.assertEqual(d6.ProbGreater(3), 0.5)
     two = d6 + d6
     three = two + d6
     # Pmf no longer supports magic comparators
     self.assertAlmostEqual(two.ProbGreater(three), 0.15200617284)
     self.assertAlmostEqual(two.ProbLess(three), 0.778549382716049)
Esempio n. 19
0
def MakeUniformPmf(low, high):
    """Make a uniform Pmf.

    low: lowest value (inclusive)
    high: highest value (inclusive)
    """
    xs = MakeRange(low, high)
    pmf = thinkstats2.Pmf(xs)
    return pmf
Esempio n. 20
0
def MakeFigures(live, firsts, others):
    """Creates several figures for the book.

    live: DataFrame
    firsts: DataFrame
    others: DataFrame
    """

    first_wgt = firsts.totalwgt_lb
    first_wgt_dropna = first_wgt.dropna()
    print('Firsts', len(first_wgt), len(first_wgt_dropna))
    #assert len(first_wgt_dropna) == 4381
 
    other_wgt = others.totalwgt_lb
    other_wgt_dropna = other_wgt.dropna()
    print('Others', len(other_wgt), len(other_wgt_dropna))
    #assert len(other_wgt_dropna) == 4706

    first_pmf = thinkstats2.Pmf(first_wgt_dropna, label='first')
    other_pmf = thinkstats2.Pmf(other_wgt_dropna, label='other')

    width = 0.4 / 16

    # plot PMFs of birth weights for first babies and others
    thinkplot.PrePlot(2)
    thinkplot.Hist(first_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='cumulative_birthwgt_pmf',
                   title='Birth weight',
                   xlabel='weight (pounds)',
                   ylabel='PMF')

    # plot CDFs of birth weights for first babies and others
    first_cdf = thinkstats2.Cdf(firsts.totalwgt_lb, label='first')
    other_cdf = thinkstats2.Cdf(others.totalwgt_lb, label='other')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([first_cdf, other_cdf])
    thinkplot.Save(root='cumulative_birthwgt_cdf',
                   title='Birth weight',
                   xlabel='weight (pounds)',
                   ylabel='CDF',
                   axis=[0, 12.5, 0, 1]
                   )
Esempio n. 21
0
def ProbilityMassFunction(group):
    hist = thinkstats2.Hist(group)
    n = hist.Total()
    map_prob = {}
    for x, v in hist.Items():
        map_prob[x] = v / n
    #OR
    pmf = thinkstats2.Pmf(group)
    #print(type(map_prob) ," AND ", type(pmf));
    return pmf
 def testPmfProbLess(self):
     d6 = thinkstats2.Pmf(range(1, 7))
     self.assertEqual(d6.ProbLess(4), 0.5)
     self.assertEqual(d6.ProbGreater(3), 0.5)
     two = d6 + d6
     three = two + d6
     self.assertAlmostEqual(two > three, 0.15200617284)
     self.assertAlmostEqual(two < three, 0.778549382716049)
     self.assertAlmostEqual(two.ProbGreater(three), 0.15200617284)
     self.assertAlmostEqual(two.ProbLess(three), 0.778549382716049)
Esempio n. 23
0
    def testPmf(self):
        pmf = thinkstats2.Pmf('allen')
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf = thinkstats2.Pmf(Counter('allen'))
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf = thinkstats2.Pmf(pmf)
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf = thinkstats2.Pmf(pmf.d.items())
        self.assertEquals(len(pmf), 4)
        self.assertEquals(pmf.Prob('l'), 0.4)

        pmf2 = pmf.Copy()
        self.assertEquals(pmf, pmf2)
def summ_stats(x):
    for i in x:
        base = thinkstats2.Pmf(df[i])
        mean = base.Mean()
        mode = base.Mode()
        spread = base.Var()
        tails = df[i].kurtosis()
        print(
            "{} Crime Stats: mean = {:.2f}, mode = {:.2f}, spread = {:.2f}, tails = {:.2f}."
            .format(i, mean, mode, spread, tails))
Esempio n. 25
0
def Experiment5(lam=2.5, m=100):
    pmf = thinkstats2.Pmf()

    for i in range(m):
        L = SimulateGame(lam)
        pmf.Incr(L)

    pmf.Normalize()

    thinkplot.Hist(pmf)
    thinkplot.Show()
Esempio n. 26
0
def RunLoop(gap_times, nums, lam=0.0333):
    """Runs the basic analysis for a range of num_passengers.

    gap_times: sequence of float
    nums: sequence of values for num_passengers
    lam: arrival rate in passengers per second

    Returns: WaitMixtureEstimator
    """
    global UPPER_BOUND
    UPPER_BOUND = 4000

    thinkplot.Clf()

    RandomSeed(18)

    # resample gap_times
    n = 220
    cdf_z = thinkstats2.Cdf(gap_times)
    sample_z = cdf_z.Sample(n)
    pmf_z = thinkstats2.Pmf(sample_z)

    # compute the biased pmf and add some long delays
    cdf_zp = BiasPmf(pmf_z).MakeCdf()
    sample_zb = numpy.append(cdf_zp.Sample(n), [1800, 2400, 3000])

    # smooth the distribution of zb
    pdf_zb = thinkstats2.EstimatedPdf(sample_zb)
    xs = MakeRange(low=60)
    pmf_zb = pdf_zb.MakePmf(xs=xs)

    # unbias the distribution of zb and make wtc
    pmf_z = UnbiasPmf(pmf_zb)
    wtc = WaitTimeCalculator(pmf_z)

    probs = []
    for num_passengers in nums:
        ete = ElapsedTimeEstimator(wtc, lam, num_passengers)

        # compute the posterior prob of waiting more than 15 minutes
        cdf_y = ete.pmf_y.MakeCdf()
        prob = 1 - cdf_y.Prob(900)
        probs.append(prob)

        # thinkplot.Cdf(ete.pmf_y.MakeCdf(label=str(num_passengers)))

    thinkplot.Plot(nums, probs)
    thinkplot.Save(
        root='redline5',
        xlabel='Num passengers',
        ylabel='P(y > 15 min)',
        formats=FORMATS,
    )
Esempio n. 27
0
def main():
    results = ReadResults()
    speeds = GetSpeeds(results)

    speeds = BinData(speeds, 3, 12, 100)

    pmf = thinkstats2.Pmf(speeds, 'speeds')

    thinkplot.Pmf(pmf)
    thinkplot.Show(title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='probability')
Esempio n. 28
0
def MakeFigures(firsts, others):
    """Plot Pmfs of pregnancy length.

    firsts: DataFrame
    others: DataFrame
    """
    # plot the PMFs
    first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')
    other_pmf = thinkstats2.Pmf(others.prglngth, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(first_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Config(xlabel='weeks',
                     ylabel='probability',
                     axis=[27, 46, 0, 0.6])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([first_pmf, other_pmf])
    thinkplot.Save(root='probability_nsfg_pmf',
                   xlabel='weeks',
                   axis=[27, 46, 0, 0.6])

    # plot the differences in the PMFs
    weeks = range(35, 46)
    diffs = []
    for week in weeks:
        p1 = first_pmf.Prob(week)
        p2 = other_pmf.Prob(week)
        diff = 100 * (p1 - p2)
        diffs.append(diff)

    thinkplot.Bar(weeks, diffs)
    thinkplot.Save(root='probability_nsfg_diffs',
                   title='Difference in PMFs',
                   xlabel='weeks',
                   ylabel='percentage points',
                   legend=False)
Esempio n. 29
0
def ResampleRowsWeighted(df, attr='finalwgt'):
    """Resamples a DataFrame using probabilities proportional to finalwgt.

    df: DataFrame
    attr: string column name to use as weights

    returns: DataFrame
    """
    weights = df[attr]
    cdf = thinkstats2.Pmf(weights).MakeCdf()
    indices = cdf.Sample(len(weights))
    sample = df.loc[indices]
    return sample
Esempio n. 30
0
def SimulateManyGames(lam, iters=1000000):
    lam_est = []
    for _ in np.arange(iters):
        lam_est.append(SimulateGame(lam))
    print('Mean Error =', MeanError(lam_est, lam))
    print('RMSE =', RMSE(lam_est, lam))
    lam_cdf = thinkstats2.Cdf(lam_est)
    ci = lam_cdf.Percentile(5), lam_cdf.Percentile(95)
    lam_pmf = thinkstats2.Pmf(lam_est)
    thinkplot.Cdf(lam_cdf)
    thinkplot.Plot([ci[0], ci[0]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Plot([ci[1], ci[1]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Config(xlabel='Goals per game', ylabel='CDF', legend=False)