예제 #1
0
def RunLoop(gap_times, nums, lam=0.0333):
    """Runs the basic analysis for a range of num_passengers.

    gap_times: sequence of float
    nums: sequence of values for num_passengers
    lam: arrival rate in passengers per second

    Returns: WaitMixtureEstimator
    """
    global UPPER_BOUND
    UPPER_BOUND = 4000

    thinkplot.Clf()

    RandomSeed(18)

    # resample gap_times
    n = 220
    cdf_z = thinkbayes.MakeCdfFromList(gap_times)
    sample_z = cdf_z.Sample(n)
    pmf_z = thinkbayes.MakePmfFromList(sample_z)

    # compute the biased pmf and add some long delays
    cdf_zp = BiasPmf(pmf_z).MakeCdf()
    sample_zb = cdf_zp.Sample(n) + [1800, 2400, 3000]

    # smooth the distribution of zb
    pdf_zb = thinkbayes.EstimatedPdf(sample_zb)
    xs = MakeRange(low=60)
    pmf_zb = pdf_zb.MakePmf(xs)

    # unbias the distribution of zb and make wtc
    pmf_z = UnbiasPmf(pmf_zb)
    wtc = WaitTimeCalculator(pmf_z)

    probs = []
    for num_passengers in nums:
        ete = ElapsedTimeEstimator(wtc, lam, num_passengers)

        # compute the posterior prob of waiting more than 15 minutes
        cdf_y = ete.pmf_y.MakeCdf()
        prob = 1 - cdf_y.Prob(900)
        probs.append(prob)

        # thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers)))

    thinkplot.Plot(nums, probs)
    thinkplot.Save(
        root='redline5',
        xlabel='Num passengers',
        ylabel='P(y > 15 min)',
        formats=FORMATS,
    )
예제 #2
0
def PlotPosterior(suite, pcolor=False, contour=True):
    """Makes a contour plot.
    
    suite: Suite that maps (mu, sigma) to probability
    """
    thinkplot.Clf()
    thinkplot.Contour(suite.GetDict(), pcolor=pcolor, contour=contour)

    thinkplot.Save(root='variability_posterior_%s' % suite.name,
                   title='Posterior joint distribution',
                   xlabel='Mean height (cm)',
                   ylabel='Stddev (cm)')
예제 #3
0
def MakePlots(player1, player2):
    """Generates two plots.

    price1 shows the priors for the two players
    price2 shows the distribution of diff for the two players
    """

    # plot the prior distribution of price for both players
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    pmf1 = player1.PmfPrice()
    pmf1.name = 'showcase 1'
    pmf2 = player2.PmfPrice()
    pmf2.name = 'showcase 2'
    thinkplot.Pmfs([pmf1, pmf2])
    thinkplot.Save(root='price1',
                xlabel='price ($)',
                ylabel='PDF',
                formats=FORMATS)

    # plot the historical distribution of underness for both players
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    cdf1 = player1.CdfDiff()
    cdf1.name = 'player 1'
    cdf2 = player2.CdfDiff()
    cdf2.name = 'player 2'

    print 'Player median', cdf1.Percentile(50)
    print 'Player median', cdf2.Percentile(50)

    print 'Player 1 overbids', player1.ProbOverbid()
    print 'Player 2 overbids', player2.ProbOverbid()

    thinkplot.Cdfs([cdf1, cdf2])
    thinkplot.Save(root='price2',
                xlabel='diff ($)',
                ylabel='CDF',
                formats=FORMATS)
예제 #4
0
    def PlotJointDist(self):
        """Makes a pcolor plot of the age-size joint distribution."""
        thinkplot.Clf()

        joint = self.cache.GetDistAgeSize()
        thinkplot.Contour(joint, contour=False, pcolor=True)

        thinkplot.Save(root='kidney8',
                       formats=FORMATS,
                       axis=[0, 41, -0.7, 1.31],
                       yticks=MakeLogTicks([0.2, 0.5, 1, 2, 5, 10, 20]),
                       xlabel='ages',
                       ylabel='diameter (cm, log scale)')
예제 #5
0
def PlotSuites(suites, root):
    """Plots two suites.
    suite1, suite2: Suite objects
    root: string filename to write
    """
    thinkplot.Clf()
    thinkplot.PrePlot(len(suites))
    thinkplot.Pmfs(suites)

    thinkplot.Save(root=root,
                   xlabel='x',
                   ylabel='Probability',
                   formats=['pdf', 'eps'])
예제 #6
0
 def _test1(show=0):
     # 已知r, 求n的分布 即泊松分布
     r = 150
     # MakePoissonPmf: 存在一个上限(无极限), 需要归一化
     pmf = thinkbayes2.MakePoissonPmf(r, 2 * r, 1)
     if show:
         thinkplot.Clf()
         thinkplot.Pmf(pmf)
         thinkplot.Show(title="test1",
                        xlabel='Event Count',
                        ylabel='Probality')
     print("Total: ", pmf.Total())
     return pmf
예제 #7
0
def PlotPriorDist(pmf):
    """Plot the prior distribution of p_correct.

    pmf: prior
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=1)

    cdf1 = thinkbayes.MakeCdfFromPmf(pmf, 'prior')
    thinkplot.Cdf(cdf1)
    thinkplot.Save(root='sat_prior',
                   xlabel='p_correct',
                   ylabel='CDF',
                   formats=['pdf', 'eps'])
예제 #8
0
 def _test2(show):
     # 已知n, f(纪录到的概率), 求k的分布, hypo
     n = 150
     f = 0.1
     # MakeBinomialPmf: 二项分布 0 - n次已经罗列了所有可能, 不需要归一化
     pmf = thinkbayes2.MakeBinomialPmf(n, f)
     if show:
         thinkplot.Clf()
         thinkplot.Pmf(pmf)
         thinkplot.Show(title="test2",
                        xlabel='Event Count',
                        ylabel='Probality')
     print("Total: ", pmf.Total())
     return pmf
예제 #9
0
    def PlotPmfs(self, root='redline0'):
        """Plots the computed Pmfs.

        root: string
        """
        pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(2)
        thinkplot.Pmfs(pmfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #10
0
def CH6_5(diff1, diff2):
    """
    两组展品的出价差的CDF累计分布
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    diff1_cdf = thinkbayes.MakeCdfFromList(diff1, name='diff1')
    diff2_cdf = thinkbayes.MakeCdfFromList(diff2, name='diff2')

    thinkplot.Cdfs([diff1_cdf, diff2_cdf])
    thinkplot.Show(xlabel='diff $', ylabel="CDF")

    # 计算CDF(diff <= 0), 判断选手是否偏向低估商品
    print(diff1_cdf.Prob(0), diff2_cdf.Prob(0))
예제 #11
0
def CH7_5():
    """
    胜算
    """
    go1, go2 = CH7_4(0)
    diff_pmf = go1 - go2

    thinkplot.Clf()
    thinkplot.Pmf(diff_pmf)
    thinkplot.Show(title='diff', xlabel='Goals per game', ylabel='Probability')

    pwin = diff_pmf.ProbGreater(0)
    pmiss = diff_pmf.ProbLess(0)
    ptie = diff_pmf.Prob(0, default=0)
    print("pwin = %.3f pmiss = %.3f ptie = %.3f" % (pwin, pmiss, ptie))
예제 #12
0
def PlotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.Clf()
    thinkplot.Cdfs(cdfs)
    thinkplot.Save(root='variability_cdfs',
                   title='CDF of height',
                   xlabel='Reported height (cm)',
                   ylabel='CDF')
예제 #13
0
def PlotSuites(suites, root):
    """Plots two suites.

    suite1, suite2: Suite objects
    root: string filename to write
    """
    formats = ['pdf', 'png']
    thinkplot.Clf()
    thinkplot.PrePlot(len(suites))
    thinkplot.Pmfs(suites)
    thinkplot.Save(root=root,
                   xlabel='Percentage of Active Female Users',
                   ylabel='Probability',
                   formats=formats,
                   legend=True)
예제 #14
0
def sim_pearson(perfs, p1, p2):
    """
    皮尔逊相关系数(Pearson correlation coefficient)
    cov(X, Y) / sigmaX*sigmaY
    协方差(X,Y) / X的标准方差*Y的标准方差
    """
    shared_items = {}
    for item in perfs[p1]:
        if item in perfs[p2]:
            shared_items[item] = 1

    n = len(shared_items)

    if n == 0: return 0 

    # p1, p2共同的影评数据
    data_p1 = [perfs[p1][it] for it in shared_items]
    data_p2 = [perfs[p2][it] for it in shared_items]

    # 计算影评均值
    mu_p1 = sum(data_p1) / n
    mu_p2 = sum(data_p2) / n
    #  print(mu_p1, mu_p2)

    # 计算标准方差
    var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n
    var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n
    #  print(var_p1, var_p2)

    if var_p1 == 0 or var_p2 == 0: return 0

    # 计算协方差
    cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n
    #  print(cov)

    # 计算皮尔逊相关系数
    r = cov / sqrt(var_p1*var_p2)

    # ============  thinkstat 方法 ===============

    if show:
        rr = correlation.Corr(data_p1, data_p2)
        print(r, rr)
        thinkplot.Clf()
        thinkplot.Scatter(data_p1, data_p2)
        thinkplot.Show()
    
    return r
예제 #15
0
def PlotCdfs(d, labels):
    """Plot CDFs for each sequence in a dictionary.

    Jitters the data and subtracts away the mean.

    d: map from key to sequence of values
    labels: map from key to string label
    """
    thinkplot.Clf()
    for key, xs in d.iteritems():
        mu = thinkstats.Mean(xs)
        xs = thinkstats.Jitter(xs, 1.3)
        xs = [x - mu for x in xs]
        cdf = thinkbayes.MakeCdfFromList(xs)
        thinkplot.Cdf(cdf, label=labels[key])
    thinkplot.Show()
예제 #16
0
def main():
    h1 = Hockey()
    h2 = Hockey()
    h1.UpdateSet([0, 2, 8, 4])
    h2.UpdateSet([0, 1, 2, 3])
    h1 = MakeGoalPmf(h1)
    h2 = MakeGoalPmf(h2)

    thinkplot.Clf()
    thinkplot.preplot(num=2)
    thinkplot.Pmf(h1)
    thinkplot.Pmf(h2)
    thinkplot.Save(root='hockey_self5_MakeGoalPmf',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])
예제 #17
0
    def CalibrateDifficulty(self):
        """Make a plot showing the model distribution of raw scores."""
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)

        cdf = thinkbayes2.Cdf(self.raw, label='data')
        thinkplot.Cdf(cdf)

        efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3)
        pmf = self.MakeRawScoreDist(efficacies)
        cdf = thinkbayes2.Cdf(pmf, label='model')
        thinkplot.Cdf(cdf)

        thinkplot.Save(root='sat_calibrate',
                       xlabel='raw score',
                       ylabel='CDF',
                       formats=['pdf', 'eps'])
예제 #18
0
    def PlotPosteriors(self, other):
        """Plots posterior distributions of efficacy.

        self, other: Sat objects.
        """
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)

        cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score)
        cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score)

        thinkplot.Cdfs([cdf1, cdf2])
        thinkplot.Save(xlabel='efficacy',
                       ylabel='CDF',
                       axis=[0, 4.6, 0.0, 1.0],
                       root='sat_posteriors_eff',
                       formats=['pdf', 'eps'])
예제 #19
0
    def PlotBuckets(self):
        """Plots the set of sequences that ended in a given bucket."""
        # 2.01, 4.95 cm, 9.97 cm
        buckets = [7.0, 16.0, 23.0]
        buckets = [23.0]
        colors = ['blue', 'green', 'red', 'cyan']

        thinkplot.Clf()
        for bucket, color in zip(buckets, colors):
            self.PlotBucket(bucket, color)

        thinkplot.Save(root='kidney5',
                       formats=FORMATS,
                       title='History of simulated tumors',
                       axis=[-40, 1, MINSIZE, 12],
                       xlabel='years',
                       ylabel='diameter (cm, log scale)',
                       yscale='log')
예제 #20
0
def QQPlot(cdf, fit):
    """Makes a QQPlot of the values from actual and fitted distributions.

    cdf: actual Cdf of RDT
    fit: model
    """
    xs = [-1.5, 5.5]
    thinkplot.Clf()
    thinkplot.Plot(xs, xs, 'b-')

    xs, ps = cdf.xs, cdf.ps
    fs = [fit.Value(p) for p in ps]

    thinkplot.Plot(xs, fs, 'gs')
    thinkplot.Save(root='kidney3',
                   formats=FORMATS,
                   xlabel='Actual',
                   ylabel='Model')
예제 #21
0
def PlotMarginals(suite):
    """Plots marginal distributions from a joint distribution.

    suite: joint distribution of mu and sigma.
    """
    thinkplot.Clf()

    pyplot.subplot(1, 2, 1)
    pmf_m = suite.Marginal(0)
    cdf_m = thinkbayes.MakeCdfFromPmf(pmf_m)
    thinkplot.Cdf(cdf_m)

    pyplot.subplot(1, 2, 2)
    pmf_s = suite.Marginal(1)
    cdf_s = thinkbayes.MakeCdfFromPmf(pmf_s)
    thinkplot.Cdf(cdf_s)

    thinkplot.Show()
예제 #22
0
    def MakePlot(self, root='redline1'):
        """Plot the prior and posterior CDF of passengers arrival rate.

        root: string
        """
        thinkplot.Clf()
        thinkplot.PrePlot(2)

        # convert units to passengers per minute
        prior = self.prior_lam.MakeCdf().Scale(60)
        post = self.post_lam.MakeCdf().Scale(60)

        thinkplot.Cdfs([prior, post])

        thinkplot.Save(root=root,
                       xlabel='Arrival rate (passengers / min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #23
0
def ComparePriors():
    """Runs the hypothesis with two different priors and compares them."""
    dataset = [60]
    high = 1000

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    constructors = [Train, Train2]
    labels = ['uniform', 'power law']

    for constructor, label in zip(constructors, labels):
        suite = MakePosterior(high, dataset, constructor)
        suite.name = label
        thinkplot.Pmf(suite)

    thinkplot.Save(root='train4',
                   xlabel='Number of trains',
                   ylabel='Probability')
예제 #24
0
    def MakePlot(self, root='redline3'):
        """Plot the CDFs.

        root: string
        """
        # observed gaps
        cdf_prior_x = self.prior_x.MakeCdf()
        cdf_post_x = self.post_x.MakeCdf()
        cdf_y = self.pmf_y.MakeCdf()

        cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(3)
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #25
0
    def PlotConditionalCdfs(self):
        """Plots the cdf of ages for each bucket."""
        buckets = [7.0, 16.0, 23.0, 27.0]
        # 2.01, 4.95 cm, 9.97 cm, 14.879 cm
        names = ['2 cm', '5 cm', '10 cm', '15 cm']
        cdfs = []

        for bucket, name in zip(buckets, names):
            cdf = self.cache.ConditionalCdf(bucket, name)
            cdfs.append(cdf)

        thinkplot.Clf()
        thinkplot.PrePlot(num=len(cdfs))
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root='kidney6',
                       title='Distribution of age for several diameters',
                       formats=FORMATS,
                       xlabel='tumor age (years)',
                       ylabel='CDF',
                       loc=4)
예제 #26
0
def PlotExpectedGains(guess1=20000, guess2=40000, path='.', save=True):
    """Plots expected gains as a function of bid.

    guess1: player1's estimate of the price of showcase 1
    guess2: player2's estimate of the price of showcase 2
    """
    player1, player2 = MakePlayers(path)
    MakePlots(player1, player2)

    player1.MakeBeliefs(guess1)
    player2.MakeBeliefs(guess2)

    print('Player 1 prior mle', player1.prior.MaximumLikelihood())
    print('Player 2 prior mle', player2.prior.MaximumLikelihood())
    print('Player 1 mean', player1.posterior.Mean())
    print('Player 2 mean', player2.posterior.Mean())
    print('Player 1 mle', player1.posterior.MaximumLikelihood())
    print('Player 2 mle', player2.posterior.MaximumLikelihood())

    player1.PlotBeliefs('price3')
    player2.PlotBeliefs('price4')

    calc1 = GainCalculator(player1, player2)
    calc2 = GainCalculator(player2, player1)

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    bids, gains = calc1.ExpectedGains()
    thinkplot.Plot(bids, gains, label='Player 1')
    print('Player 1 optimal bid', max(zip(gains, bids)))

    bids, gains = calc2.ExpectedGains()
    thinkplot.Plot(bids, gains, label='Player 2')
    print('Player 2 optimal bid', max(zip(gains, bids)))

    if save:
        thinkplot.Save(root='price5',
                    xlabel='bid ($)',
                    ylabel='expected gain ($)',
                    formats=FORMATS)
예제 #27
0
    def MakePlot(self, root='redline2'):
        """Plots the computed CDFs.

        root: string
        """
        print('Mean z', self.pmf_z.Mean() / 60)
        print('Mean zb', self.pmf_zb.Mean() / 60)
        print('Mean y', self.pmf_y.Mean() / 60)

        cdf_z = self.pmf_z.MakeCdf()
        cdf_zb = self.pmf_zb.MakeCdf()
        cdf_y = self.pmf_y.MakeCdf()

        cdfs = ScaleDists([cdf_z, cdf_zb, cdf_y], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(3)
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #28
0
def main():
    ComparePriors()

    dataset = [30, 60, 90]

    thinkplot.Clf()
    thinkplot.PrePlot(num=3)

    for high in [500, 1000, 2000]:
        suite = MakePosterior(high, dataset, Train2)
        print(high, suite.Mean())

    thinkplot.Save(root='train3',
                   xlabel='Number of trains',
                   ylabel='Probability')

    interval = Percentile(suite, 5), Percentile(suite, 95)
    print(interval)

    cdf = thinkbayes.MakeCdfFromPmf(suite)
    interval = cdf.Percentile(5), cdf.Percentile(95)
    print(interval)
예제 #29
0
def CH6_2(price1, price2):
    """
    两组展览品的价格分布
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    # 因为price变量值没有重复的, 所以PMF绘图是看不出什么的.
    # price1_pmf = thinkbayes.MakePmfFromList(price1, name='showcase1')
    # price2_pmf = thinkbayes.MakePmfFromList(price2, name='showcase2')

    price1_max = max(price1)
    price2_max = max(price2)
    price_max = max(price1_max, price2_max)
    xs = numpy.linspace(0, price_max + 100, num=150)

    price1_pdf = thinkbayes.EstimatedPdf(price1)
    price2_pdf = thinkbayes.EstimatedPdf(price2)
    price1_pmf = price1_pdf.MakePmf(xs, name='showcase1')
    price2_pmf = price2_pdf.MakePmf(xs, name='showcase2')

    thinkplot.Pmfs([price1_pmf, price2_pmf])
    thinkplot.Show(xlabel='price $', ylabel='PMF')
예제 #30
0
    def MakePlot(self, root='redline4'):
        """Makes a plot showing the mixture."""
        thinkplot.Clf()

        # plot the MetaPmf
        #  for pmf, prob in sorted(self.metapmf.Items()):
        for pmf, prob in self.metapmf.Items():
            cdf = pmf.MakeCdf().Scale(1.0/60)
            width = 2/math.log(-math.log(prob))
            thinkplot.Plot(cdf.xs, cdf.ps,
                           alpha=0.2, linewidth=width, color='blue', 
                           label='')

        # plot the mixture and the distribution based on a point estimate
        thinkplot.PrePlot(2)
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
        thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0/60))

        thinkplot.Save(root=root,
                       xlabel='Wait time (min)',
                       ylabel='CDF',
                       formats=FORMATS,
                       axis=[0,10,0,1])