Exemple #1
0
 def _test2(show):
     # 已知n, f(纪录到的概率), 求k的分布, hypo
     n = 150
     f = 0.1
     # MakeBinomialPmf: 二项分布 0 - n次已经罗列了所有可能, 不需要归一化
     pmf = thinkbayes2.MakeBinomialPmf(n, f)
     if show:
         thinkplot.Clf()
         thinkplot.Pmf(pmf)
         thinkplot.Show(title="test2",
                        xlabel='Event Count',
                        ylabel='Probality')
     print("Total: ", pmf.Total())
     return pmf
Exemple #2
0
def main():
    hypos = range(100, 1001)
    suite = Train(hypos)

    suite.Update(321)
    print('Posterior mean', suite.Mean())
    print('Posterior MLE', suite.MaximumLikelihood())
    print('Posterior CI 90', suite.CredibleInterval(90))

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Show(xlabel='Number of trains',
                   ylabel='Probability',
                   legend=False)
Exemple #3
0
def main():
    hypos = xrange(1, 1001)
    suite = Train(hypos)
    suite.label = 'train label'

    suite.Update(60)
    print suite.Mean()

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Save(root='train1',
                   xlabel='Number of trains',
                   ylabel='Probability',
                   formats=['pdf', 'eps'])
Exemple #4
0
def main():
    print "1"
    hockey1 = Hockey()
    #    print(type(hockey1))

    thinkplot.PrePlot(1)
    thinkplot.Pmf(hockey1)
    thinkplot.Save(root='hockey_self2_prior',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])

    print(hockey1.Values())
    for hypo in hockey1.Values():
        print(hockey1.Likelihood(2, hypo))

    hockey1.UpdateSet([0, 2, 4, 3, 8])
    thinkplot.Pmf(hockey1)
    thinkplot.Save(root='hockey_self2_posterior',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])

    print("No error, everything worked fine")
Exemple #5
0
def main():
    hypos = range(100, 1001)
    suite = Train(hypos)

    suite.Update(50)

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Show(xlabel='Number of trains',
                   ylabel='Probability',
                   legend=False)

    for train in [13, 45, 89, 22, 33, 35]:
        suite.Update(train)

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Show(xlabel='Number of trains',
                   ylabel='Probability',
                   legend=False)

    print(suite.Mean())
    print(suite.MaximumLikelihood())
    print(suite.CredibleInterval(90))
Exemple #6
0
def CH7_5():
    """
    胜算
    """
    go1, go2 = CH7_4(0)
    diff_pmf = go1 - go2

    thinkplot.Clf()
    thinkplot.Pmf(diff_pmf)
    thinkplot.Show(title='diff', xlabel='Goals per game', ylabel='Probability')

    pwin = diff_pmf.ProbGreater(0)
    pmiss = diff_pmf.ProbLess(0)
    ptie = diff_pmf.Prob(0, default=0)
    print("pwin = %.3f pmiss = %.3f ptie = %.3f" % (pwin, pmiss, ptie))
Exemple #7
0
def CH5_6():
    """
    混合分布, 汇总多个分布的贡献

    骰子个数    骰子面数
      5          4-sides
      4          6-sides
      3          8-sides
      2         12-sides
      1         20-sides
    """
    thinkplot.PrePlot(num=2)

    # (权重, 骰子)
    dices = [(5, Die(4)), (4, Die(6)), (3, Die(8)), (2, Die(12)), (1, Die(20))]
    mix = thinkbayes.Pmf()
    for w, die in dices:
        for v, p in die.Items():
            mix.Incr(v, w * p)
    mix.Normalize()
    mix.name = 'mix-1'
    thinkplot.Pmf(mix)

    # 方法2
    pmf_dices = thinkbayes.Pmf()
    pmf_dices.Set(Die(4), y=5)
    pmf_dices.Set(Die(6), y=4)
    pmf_dices.Set(Die(8), y=3)
    pmf_dices.Set(Die(12), y=2)
    pmf_dices.Set(Die(20), y=1)
    pmf_dices.Normalize()
    mix = thinkbayes.MakeMixture(pmf_dices, name='mix-2')
    mix.name = 'mix-2'
    thinkplot.Pmf(mix)

    thinkplot.Show()
Exemple #8
0
def main():
    pmf_dice = Pmf()
    pmf_dice.Set(Die(6),2)
    pmf_dice.Set(Die(8),3)
    pmf_dice.Set(Die(12),1)
    pmf_dice.Set(Die(20),1)
    
    mix = Pmf()
    for die, weight in pmf_dice.Items():
        for outcome, prob in die.Items():
            mix.Incr(outcome, weight*prob)
    mix.Normalize()

    thinkplot.PrePlot(1)
    thinkplot.Pmf(mix)
    thinkplot.Save(root='dice_Mix_self3',xlabel='',ylabel='Probability',formats=['pdf'])
Exemple #9
0
def MakePmfPlot(alpha=10):
    """Plots Pmf of location for a range of betas."""
    locations = range(0, 31)

    betas = [10, 20, 40]
    thinkplot.PrePlot(num=len(betas))

    for beta in betas:
        pmf = MakeLocationPmf(alpha, beta, locations)
        pmf.name = 'beta = %d' % beta
        thinkplot.Pmf(pmf)

    thinkplot.Save('paintball1',
                   xlabel='Distance',
                   ylabel='Prob',
                   formats=FORMATS)
def main():
    low = 0.001
    high = 1.5
    steps = 1001
    hypos = [low + (high - low) * i / (steps - 1.0) for i in range(steps)]

    suite = Decay(hypos)
    data = [1.5, 2, 3, 4, 5, 12]

    suite.UpdateSet(data)
    print 'Mean of the posterior distribution:', suite.Mean()

    # plot the posterior distribution
    thinkplot.Pmf(suite)
    thinkplot.Show(title='Decay parameter',
                   xlabel='Parameter (inverse cm)',
                   ylabel='Posterior probability')
Exemple #11
0
def PlotSurvivalCurve(ts, lams, ss):
    """
    
    ts: times in years
    lams: Pmf representing the hazard function
    ss: list of values for the survival curve
    """
    # scale lams
    denom = max(lams.Probs())
    lams.MultAll(1 / denom)
    thinkplot.Pmf(lams, linewidth=2, linestyle='dashed', color='0.7')

    thinkplot.Plot(ts, ss, linewidth=2, color='blue', label='survival')
    thinkplot.Save(root='seer1',
                   title='',
                   xlabel='Survival time (years)',
                   ylabel='Probability')
Exemple #12
0
    def PredRemaining(self, rem_time, score):
        """Plots the predictive distribution for final number of goals.

        rem_time: remaining time in the game in minutes
        score: number of goals already scored
        """
        # TODO: fill this in
        # lam = goals / game
        lam_total = 0
        for lam, prob in self.Items():
            goals_in_remaining_time = lam * rem_time / 90  # convert to goals in remaining time
            lam_total += lt * prob

        pmf = thinkbayes2.MakePoissonPmf(goals_in_remaining_time, 12)
        pmf += score
        thinkplot.Pmf(pmf)
        thinkplot.Show()
Exemple #13
0
def PlotSurvival(durations):
    """Plots survival and hazard curves.

    durations: list of durations
    """
    cdf = thinkstats2.MakeCdfFromList(durations)
    thinkplot.Cdf(cdf, alpha=0.1)
    thinkplot.PrePlot(2)

    ts, ss = SurvivalFunction(cdf)

    thinkplot.Plot(ts, ss, label="S(t)")

    haz_func = HazardFunction(ts, ss)
    thinkplot.Pmf(haz_func, label='lam(t)')

    thinkplot.Show(xlabel='t (weeks)')
Exemple #14
0
def MakeConditionalPlot(suite):
    """Plots marginal CDFs for alpha conditioned on beta.

    suite: posterior joint distribution of location
    """
    betas = [10, 20, 40]
    thinkplot.PrePlot(num=len(betas))

    for beta in betas:
        cond = suite.Conditional(0, 1, beta)
        cond.name = 'beta = %d' % beta
        thinkplot.Pmf(cond)

    thinkplot.Save('paintball3',
                   xlabel='Distance',
                   ylabel='Prob',
                   formats=FORMATS)
Exemple #15
0
def main():
    data = ReadData()
    cols = zip(*data)
    price1, price2, bid1, bid2, diff1, diff2 = cols

    pdf = thinkbayes.EstimatedPdf(price1)
    #    print(type(pdf))
    low, high = 0, 75000
    n = 101
    xs = numpy.linspace(low, high, n)
    #    print(pdf.Density(25000))
    pmf = pdf.MakePmf(xs)

    thinkplot.PrePlot(1)
    thinkplot.Pmf(pmf)
    thinkplot.Save(root='price_self2',
                   xlabel='',
                   ylabel='Probability_density',
                   formats=['pdf'])
Exemple #16
0
def ComparePriors():
    """Runs the hypothesis with two different priors and compares them."""
    dataset = [60]
    high = 1000

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    constructors = [Train, Train2]
    labels = ['uniform', 'power law']

    for constructor, label in zip(constructors, labels):
        suite = MakePosterior(high, dataset, constructor)
        suite.name = label
        thinkplot.Pmf(suite)

    thinkplot.Save(root='train4',
                   xlabel='Number of trains',
                   ylabel='Probability')
def main():
    d6 = Die(6)
    d8 = Die(8)
    d12 = Die(12)
    d16 = Die(16)
    d20 = Die(20)

    mix = Pmf()
    for die in [d6, d8, d12, d16, d20]:
        for outcome, prob in die.Items():
            mix.Incr(outcome, prob)
    mix.Normalize()

    thinkplot.PrePlot(1)
    thinkplot.Pmf(mix)
    thinkplot.Save(root='dice_Mix_self1',
                   xlabel='sum of dice',
                   ylabel='Probability',
                   formats=['pdf'])
Exemple #18
0
def MakeHists(live):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
    thinkplot.PrePlot(2, cols=2)

    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.Config(xlabel='years', ylabel='frequency', axis=[0, 45, 0, 700])

    thinkplot.SubPlot(2)
    thinkplot.Pmf(hist)

    thinkplot.Save(root='probability_agepreg_hist',
                   xlabel='years',
                   axis=[0, 45, 0, 700])
Exemple #19
0
def main():
    d6 = Die(6)
    dice = [d6] * 3
    dice1 = [d6] * 1
    print type(d6)
    print d6.Items()
    print type(dice)
    print dice[0].Items()
    print dice[1].Items()
    print dice[2].Items()

    #    t1 = RandomSum(dice)
    test = SampleSum(dice, 50)

    thinkplot.PrePlot(1)
    thinkplot.Pmf(test)
    thinkplot.Save(root='dice_self2',
                   xlabel='sum of dice',
                   ylabel='Probability',
                   formats=['pdf'])
Exemple #20
0
def main():

    d = {
        1: 1,
        2: 1,
        3: 1,
        4: 1,
        5: 1,
        6: 1,
        7: 1,
    }

    # form the pmf
    pmf = thinkstats2.MakePmfFromDict(d, 'family size')
    print 'mean', pmf.Mean()
    print 'var', pmf.Var()

    # plot the Pmfs
    thinkplot.Pmf(pmf)
    thinkplot.Show(xlabel='Family size', ylabel='PMF')
Exemple #21
0
def main():
    pmf = Euro(xrange(0, 101))
    dataset = 'H' * 140 + "T" * 110
    #    dataset = 'H' + 'T'
    #    dataset = 'T'
    for data in dataset:
        #        print(data)
        pmf.Update(data)


#	print pmf.Items()
#	print pmf.Mean()
    print pmf.Prob(80)

    thinkplot.PrePlot(1)
    thinkplot.Pmf(pmf)
    thinkplot.Save(root='coin_self1',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])
Exemple #22
0
def main():

    # Create a new Train object with hypotheses 1 (company has one train)
    # through 1000 (company has 1000 trains)
    train = Train(range(1, 1001))
    train.label = "Posterior Probability"

    # update the probability mass function with new data (train #60)
    train.Update(60)

    # train.Print()

    print("Mean hypothesis: {}".format(train.Mean()))

    # Use Allen Downey's thinkplot module to create a graph
    thinkplot.PrePlot(1)
    thinkplot.Pmf(train)
    thinkplot.Save(root='trains',
                   xlabel='Number of trains',
                   ylabel='Probability',
                   formats=['pdf'])
Exemple #23
0
def main():
    '''initializes an instance of a learning styles probability distribution
	updates the probability distribution based on data
	checks the strength of the evidence that the distribution in hacker school is substantiallly different'''

    sensing_data = (2, 0)
    sensing_hypo = 50
    sensing_ratio = 65

    sensing_dist = StyleDist(range(0, 101))
    sensing_likelihood = sensing_dist.Likelihood(sensing_data, sensing_hypo)
    print('p(D|50%)', sensing_likelihood)
    thinkplot.Hist(sensing_dist)

    #set p(D|~H)
    b_uniform = StyleDist(range(0, 101))
    b_uniform.Remove(sensing_ratio)
    b_uniform.Normalize()

    # %matplotlib inline
    thinkplot.Pmf(sensing_dist)
    return sensing_dist
Exemple #24
0
def main():

    data = 20, 15, 3
    probs = numpy.linspace(0, 1, 31)
    hypos = []
    for n in range(32, 350):
        for p1 in probs:
            for p2 in probs:
                hypos.append((n, p1, p2))

    suite = Lincoln(hypos)
    suite.Update(data)

    n_marginal = suite.Marginal(0)

    thinkplot.Pmf(n_marginal, label='n')
    thinkplot.Save(root='lincoln1',
                   xlabel='number of bugs',
                   ylabel='PMF',
                   formats=['pdf', 'png'])

    print('post mean n', n_marginal.Mean())
    print('MAP n', n_marginal.MaximumLikelihood())

    p1_marginal = suite.Marginal(1, label='p1')
    p2_marginal = suite.Marginal(2, label='p2')

    thinkplot.Pdf(p1_marginal)
    thinkplot.Pdf(p2_marginal)
    thinkplot.Show()

    print('post mean p1', p1_marginal.Mean())
    print('MAP p1', p1_marginal.MaximumLikelihood())

    print('post mean p2', p2_marginal.Mean())
    print('MAP p2', p2_marginal.MaximumLikelihood())

    print('p1 > p2', p1_marginal > p2_marginal)
    print('p1 < p2', p1_marginal < p2_marginal)
Exemple #25
0
def main():
    data = 20, 15, 3
    probs = numpy.linspace(0, 1, 101)
    hypos = []
    for n in range(32, 350):
        for p1 in probs:
            for p2 in probs:
                hypos.append((n, p1, p2))

    suite = Lincoln(hypos)
    suite.Update(data)

    n_marginal = suite.Marginal(0)

    thinkplot.Pmf(n_marginal, label='n')
    thinkplot.Save(root='lincoln1',
                   xlabel='number of bugs',
                   ylabel='PMF',
                   formats=['pdf', 'png'])

    print(n_marginal.Mean())
    print(n_marginal.MaximumLikelihood())
Exemple #26
0
def main():
    pmf_dice = thinkbayes.Pmf()
    pmf_dice.Set(Die(4), 5)
    pmf_dice.Set(Die(6), 4)
    pmf_dice.Set(Die(8), 3)
    pmf_dice.Set(Die(12), 2)
    pmf_dice.Set(Die(20), 1)
    pmf_dice.Normalize()

    mix = thinkbayes.Pmf()
    for die, weight in pmf_dice.Items():
        for outcome, prob in die.Items():
            mix.Incr(outcome, weight * prob)

    mix = thinkbayes.MakeMixture(pmf_dice)

    colors = thinkplot.Brewer.Colors()
    thinkplot.Hist(mix, width=0.9, color=colors[4])
    thinkplot.Save(root='dungeons3',
                   xlabel='Outcome',
                   ylabel='Probability',
                   formats=FORMATS)

    random.seed(17)

    d6 = Die(6, 'd6')

    dice = [d6] * 3
    three = thinkbayes.SampleSum(dice, 1000)
    three.name = 'sample'
    three.Print()

    three_exact = d6 + d6 + d6
    three_exact.name = 'exact'
    three_exact.Print()

    thinkplot.PrePlot(num=2)
    thinkplot.Pmf(three)
    thinkplot.Pmf(three_exact, linestyle='dashed')
    thinkplot.Save(root='dungeons1',
                   xlabel='Sum of three d6',
                   ylabel='Probability',
                   axis=[2, 19, 0, 0.15],
                   formats=FORMATS)

    thinkplot.Clf()
    thinkplot.PrePlot(num=1)

    # compute the distribution of the best attribute the hard way
    #  best_attr2 = PmfMax(three_exact, three_exact)
    #  best_attr4 = PmfMax(best_attr2, best_attr2)
    #  best_attr6 = PmfMax(best_attr4, best_attr2)
    # thinkplot.Pmf(best_attr6)

    # and the easy way
    best_attr_cdf = three_exact.Max(6)
    best_attr_cdf.name = ''
    best_attr_pmf = thinkbayes.MakePmfFromCdf(best_attr_cdf)
    best_attr_pmf.Print()

    thinkplot.Pmf(best_attr_pmf)
    thinkplot.Save(root='dungeons2',
                   xlabel='Sum of three d6',
                   ylabel='Probability',
                   axis=[2, 19, 0, 0.23],
                   formats=FORMATS)
Exemple #27
0
# In[53]:


width=200000
axis = [0, 800, 0, 0.0005]

thinkplot.PrePlot(2, cols =2)
thinkplot.Hist(flfindistdfpmf, align = 'right', width = width)
thinkplot.Hist(vfindistdfpmf, align = 'left', width = width)
thinkplot.Config(xlabel = 'Total Revenue', ylabel = 'PMF')


# In[54]:


thinkplot.Pmf(flfindistdfpmf)

thinkplot.Pmf(vfindistdfpmf)


# In[55]:


thinkplot.PrePlot(2)
thinkplot.subplot(2)
#axis = [0, 800, 0, 0.0005]
thinkplot.Pmfs([flfindistdfpmf,vfindistdfpmf ])
thinkplot.Show(xlabel = 'Total Revenue', ylabel = 'PMF')  


# # Lets plot PMF of log transformed columns
Exemple #28
0
    df = brfss.ReadBrfss(nrows=None)
    female = df[df.sex == 2]
    female_heights = female.htm3.dropna()

    ## female height statistics
    mean, std = female_heights.mean(), female_heights.std()
    print('mean:\n', mean)
    print('std:\n', std)

    ## make pdf representing female distribution
    pdf = thinkstats2.NormalPdf(mean, std)
    pmf = pdf.MakePmf()
    thinkplot.PrePlot(2)
    thinkplot.Pdf(pdf, label='normal pdf')

    thinkplot.Pmf(pmf, label='normal pmf')
    thinkplot.Show(xlabel='x', xlim=[140, 186])

    ## KDE of normal pdf
    i = 6
    thinkplot.PrePlot(i + 1)
    thinkplot.Pdf(pdf, label='normal')

    for _ in range(i):
        sample = np.random.normal(mean, std, 500)
        sample_pdf = thinkstats2.EstimatedPdf(sample, label='sample')
        thinkplot.Pdf(sample_pdf, label='sample KDE')

    thinkplot.Show(xlabel='x', ylabel='PDF', xlim=[140, 186])

    ## calculate moments
Exemple #29
0
def main():
    #ReadHockeyData()
    #return

    formats = ['pdf', 'eps']

    suite1 = Hockey('bruins')
    suite2 = Hockey('canucks')

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    thinkplot.Pmf(suite1)
    thinkplot.Pmf(suite2)
    thinkplot.Save(root='hockey0',
                xlabel='Goals per game',
                ylabel='Probability',
                formats=formats)

    suite1.UpdateSet([0, 2, 8, 4])
    suite2.UpdateSet([1, 3, 1, 0])

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    thinkplot.Pmf(suite1)
    thinkplot.Pmf(suite2)
    thinkplot.Save(root='hockey1',
                xlabel='Goals per game',
                ylabel='Probability',
                formats=formats)


    goal_dist1 = MakeGoalPmf(suite1)
    goal_dist2 = MakeGoalPmf(suite2)

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    thinkplot.Pmf(goal_dist1)
    thinkplot.Pmf(goal_dist2)
    thinkplot.Save(root='hockey2',
                xlabel='Goals',
                ylabel='Probability',
                formats=formats)

    time_dist1 = MakeGoalTimePmf(suite1)    
    time_dist2 = MakeGoalTimePmf(suite2)
 
    print('MLE bruins', suite1.MaximumLikelihood())
    print('MLE canucks', suite2.MaximumLikelihood())
   
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    thinkplot.Pmf(time_dist1)
    thinkplot.Pmf(time_dist2)    
    thinkplot.Save(root='hockey3',
                   xlabel='Games until goal',
                   ylabel='Probability',
                   formats=formats)

    diff = goal_dist1 - goal_dist2
    p_win = diff.ProbGreater(0)
    p_loss = diff.ProbLess(0)
    p_tie = diff.prob(0)

    print(p_win, p_loss, p_tie)

    p_overtime = thinkbayes2.PmfProbLess(time_dist1, time_dist2)
    p_adjust = thinkbayes2.PmfProbEqual(time_dist1, time_dist2)
    p_overtime += p_adjust / 2
    print('p_overtime', p_overtime) 

    print(p_overtime * p_tie)
    p_win += p_overtime * p_tie
    print('p_win', p_win)

    # win the next two
    p_series = p_win**2

    # split the next two, win the third
    p_series += 2 * p_win * (1-p_win) * p_win

    print('p_series', p_series)
Exemple #30
0
#--- Chapter2 Ex4
wgt_live = live.totalwgt_lb.dropna()
wgt_first = firsts.totalwgt_lb.dropna()
wgt_other = others.totalwgt_lb.dropna()
mean_diff = 100 * (wgt_first.mean() - wgt_other.mean()) / wgt_live.mean()
wgt_cohend = thinkstats2.CohenEffectSize(wgt_first, wgt_other)
plen_cohend = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
print('Difference in relative mean:', mean_diff)
print('Cohen\'s d for total weight in lbs:', wgt_cohend)
print('Cohen\'s d for pregnancy length in weeks:', plen_cohend)

#--- Chapter3 Ex1
actual_pmf = thinkstats2.Pmf(resp.numkdhh, label='actual')
biased_pmf = BiasPmf(actual_pmf, label='biased')
thinkplot.PrePlot(2)
actual_hist = thinkplot.Pmf(actual_pmf)
biased_hist = thinkplot.Pmf(biased_pmf)
thinkplot.Show(xlabel='#kids in household', ylabel='PMF')
print('Actual Mean:', actual_pmf.Mean())
print('Biased Mean:', biased_pmf.Mean())

#--- Chapter4 Ex2
my_seq = np.random.random(1000)
my_pmf = thinkstats2.Pmf(my_seq)
my_cdf = thinkstats2.Cdf(my_seq)
thinkplot.Pmf(my_pmf, linewidth=0.1)
thinkplot.Show(xlabel='Random variable', ylabel='PMF')
thinkplot.Cdf(my_cdf)
thinkplot.Show(xlabel='Random variable', ylabel='CDF')

#--- Chapter5 Ex1