예제 #1
0
def comparePriors():
    """Runs the analysis with two different priors and compares them."""
    dataset = [60]
    high = 1000

    thinkplot.clf()
    thinkplot.prePlot(num=2)

    constructors = [Train, Train2]
    labels = ['uniform', 'power law']

    # NOTE: the uniform prior means we assign probability 1/1000 to each hypotheses from 1 ... 1000
    # note then we normalize it and update by multiplying by likelihood then normalize again (why?)

    # NOTE: the power law prior means we assign 1/hypo to each hypothesis from 1 ... 1000
    # note: then normalize by summing total and dividing then update by likelihood and normalize again (why?)

    for constructor, label in zip(constructors, labels):
        suite = makePosterior(high, dataset, constructor)
        suite.name = label
        thinkplot.pmf(suite)

    thinkplot.save(root='train4',
                   xlabel='Number of trains',
                   ylabel='Probability')
예제 #2
0
def plotCoefVariation(suites):
    """Plot the posterior distributions for CV.

    suites: map from label to Pmf of CVs.
    """
    thinkplot.clf()
    thinkplot.prePlot(num=2)

    pmfs = {}
    for label, suite in suites.iteritems():
        pmf = coefVariation(suite)
        print('CV posterior mean', pmf.mean())
        cdf = thinkbayes.makeCdfFromPmf(pmf, label)
        thinkplot.cdf(cdf)

        pmfs[label] = pmf

    thinkplot.save(root='variability_cv',
                   xlabel='Coefficient of variation',
                   ylabel='Probability')

    print('female bigger',
          thinkbayes.pmfProbGreater(pmfs['female'], pmfs['male']))
    print('male bigger', thinkbayes.pmfProbGreater(pmfs['male'],
                                                   pmfs['female']))
예제 #3
0
    def makePlot(self, root='redline4'):
        """Makes a plot showing the mixture."""
        thinkplot.clf()

        # plot the MetaPmf
        for pmf, prob in sorted(self.metaPmf.items()):
            cdf = pmf.makeCdf().scale(1.0 / 60)
            width = 2 / math.log(-math.log(prob))
            thinkplot.plot(cdf.xs,
                           cdf.ps,
                           alpha=0.2,
                           linewidth=width,
                           color='blue',
                           label='')

        # plot the mixture and the distribution based on a point estimate
        thinkplot.prePlot(2)
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
        thinkplot.cdf(self.mixture.makeCdf(name='mix').scale(1.0 / 60))

        thinkplot.save(root=root,
                       xlabel='Wait time (min)',
                       ylabel='CDF',
                       formats=FORMATS,
                       axis=[0, 10, 0, 1])
예제 #4
0
    def plotBeliefs(self, root):
        """Plots prior and posterior beliefs.

        root: string filename root for saved figure
        """
        thinkplot.clf()
        thinkplot.prePlot(num=2)
        thinkplot.pmfs([self.prior, self.posterior])
        thinkplot.save(root=root,
                       xlabel='price ($)',
                       ylabel='PMF',
                       formats=FORMATS)
예제 #5
0
def plotPosterior(suite, pcolor=False, contour=True):
    """Makes a contour plot.

    suite: Suite that maps (mu, sigma) to probability
    """
    thinkplot.clf()
    thinkplot.contour(suite.getDict(), pcolor=pcolor, contour=contour)

    thinkplot.save(root='variability_posterior_%s' % suite.name,
                   title='Posterior joint distribution',
                   xlabel='Mean height (cm)',
                   ylabel='Stddev (cm)')
예제 #6
0
def plotSuites(suites, root):
    """Plots two suites.
    suite1, suite2: Suite objects
    root: string filename to write
    """
    thinkplot.clf()
    thinkplot.prePlot(len(suites))
    thinkplot.pmfs(suites)

    thinkplot.save(root=root,
                   xlabel='x',
                   ylabel='Probability',
                   formats=['pdf', 'eps'])
예제 #7
0
    def plotPmfs(self, root='redline0'):
        """Plots the computed Pmfs.

        root: string
        """
        pmfs = scaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60)

        thinkplot.clf()
        thinkplot.prePlot(2)
        thinkplot.pmfs(pmfs)
        thinkplot.save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #8
0
def plotPriorDist(pmf):
    """Plot the prior distribution of p_correct.

    pmf: prior
    """
    thinkplot.clf()
    thinkplot.prePlot(num=1)

    cdf1 = thinkbayes.makeCdfFromPmf(pmf, 'prior')
    thinkplot.cdf(cdf1)
    thinkplot.save(root='sat_1_prior',
                   xlabel='p_correct',
                   ylabel='CDF',
                   formats=['pdf'])  # ['pdf', 'eps'])
예제 #9
0
def plotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes.makeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.clf()
    thinkplot.cdfs(cdfs)
    thinkplot.save(root='variability_cdfs',
                   title='CDF of height',
                   xlabel='Reported height (cm)',
                   ylabel='CDF')
예제 #10
0
def plotCdfs(d, labels):
    """Plot CDFs for each sequence in a dictionary.

    Jitters the data and subtracts away the mean.

    d: map from key to sequence of values
    labels: map from key to string label
    """
    thinkplot.clf()
    for key, xs in d.iteritems():
        mu = thinkstats.mean(xs)
        xs = thinkstats.jitter(xs, 1.3)
        xs = [x - mu for x in xs]
        cdf = thinkbayes.makeCdfFromList(xs)
        thinkplot.cdf(cdf, label=labels[key])
    thinkplot.show()
예제 #11
0
    def plotPosteriors(self, other):
        """Plots posterior distributions of efficacy.

        self, other: Sat objects.
        """
        thinkplot.clf()
        thinkplot.prePlot(num=2)

        cdf1 = thinkbayes.makeCdfFromPmf(self, 'posterior %d' % self.score)
        cdf2 = thinkbayes.makeCdfFromPmf(other, 'posterior %d' % other.score)

        thinkplot.cdfs([cdf1, cdf2])
        thinkplot.save(xlabel='efficacy',
                       ylabel='CDF',
                       axis=[0, 4.6, 0.0, 1.0],
                       root='sat_5_posteriors_eff',
                       formats=['pdf'])
예제 #12
0
    def calibrateDifficulty(self):
        """Make a plot showing the model distribution of raw scores."""
        thinkplot.clf()
        thinkplot.prePlot(num=2)

        cdf = thinkbayes.makeCdfFromPmf(self.raw, name='data')
        thinkplot.cdf(cdf)

        efficacies = thinkbayes.makeGaussianPmf(0, 1.5, 3)
        pmf = self.makeRawScoreDist(
            efficacies)  # mixture model of raw score, prob = p1 * p2
        cdf = thinkbayes.makeCdfFromPmf(pmf, name='model')
        thinkplot.cdf(cdf)

        thinkplot.save(root='sat_2_calibrate',
                       xlabel='raw score',
                       ylabel='CDF',
                       formats=['pdf'])
예제 #13
0
def plotMarginals(suite):
    """Plots marginal distributions from a joint distribution.

    suite: joint distribution of mu and sigma.
    """
    thinkplot.clf()

    pyplot.subplot(1, 2, 1)
    pmf_m = suite.marginal(0)
    cdf_m = thinkbayes.makeCdfFromPmf(pmf_m)
    thinkplot.cdf(cdf_m)

    pyplot.subplot(1, 2, 2)
    pmf_s = suite.marginal(1)
    cdf_s = thinkbayes.makeCdfFromPmf(pmf_s)
    thinkplot.cdf(cdf_s)

    thinkplot.show()
예제 #14
0
    def makePlot(self, root='redline1'):
        """Plot the prior and posterior CDF of passengers arrival rate.

        root: string
        """
        thinkplot.clf()
        thinkplot.prePlot(2)

        # convert units to passengers per minute
        prior = self.priorLambda.makeCdf().scale(60)
        post = self.posteriorLambda.makeCdf().scale(60)

        thinkplot.cdfs([prior, post])

        thinkplot.save(root=root,
                       xlabel='Arrival rate (passengers / min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #15
0
    def makePlot(self, root='redline3'):
        """Plot the CDFs.

        root: string
        """
        # observed gaps
        cdf_prior_x = self.prior_x.makeCdf()
        cdf_post_x = self.post_x.makeCdf()
        cdf_y = self.pmf_y.makeCdf()

        cdfs = scaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60)

        thinkplot.clf()
        thinkplot.prePlot(3)
        thinkplot.cdfs(cdfs)
        thinkplot.save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #16
0
def plotExpectedGains(guess1=20000, guess2=40000):
    """Plots expected gains as a function of bid.

    guess1: player1's estimate of the price of showcase 1
    guess2: player2's estimate of the price of showcase 2
    """
    player1, player2 = makePlayers()
    makePlots(player1, player2)

    player1.makeBeliefs(guess1)
    player2.makeBeliefs(guess2)

    print('\n\nPlayer 1 prior mle', player1.prior.maximumLikelihood())
    print('Player 2 prior mle', player2.prior.maximumLikelihood())
    print('\nPlayer 1 mean', player1.posterior.mean())
    print('Player 2 mean', player2.posterior.mean())
    print('\nPlayer 1 mle', player1.posterior.maximumLikelihood())
    print('Player 2 mle', player2.posterior.maximumLikelihood())

    player1.plotBeliefs('price3_prior,posterior_player1') # was price3
    player2.plotBeliefs('price4_prior,posterior_player2') # was price4

    calc1 = GainCalculator(player1, player2)
    calc2 = GainCalculator(player2, player1)

    thinkplot.clf()
    thinkplot.prePlot(num=2)

    # NOTE: player 1 optimal bid = 21,000, expgain =  16,700, best guesss = 20,000
    bids, gains = calc1.expectedGains()
    thinkplot.plot(bids, gains, label='Player 1')
    print('\nPlayer 1 optimal bid', max(zip(gains, bids)))

    # NOTE: player 2 optimal bid = 31,500, expgain = 19,400, best guess = 40,000
    bids, gains = calc2.expectedGains()
    thinkplot.plot(bids, gains, label='Player 2')
    print('Player 2 optimal bid', max(zip(gains, bids)))

    thinkplot.save(root='price5_expectedGainsFromBids_player1,2',
                   xlabel='bid ($)',
                   ylabel='expected gain ($)',
                   formats=FORMATS)
예제 #17
0
    def makePlot(self, root='redline2'):
        """Plots the computed CDFs.

        root: string
        """
        print('Mean z', self.pmf_z.mean() / 60)
        print('Mean zb', self.pmf_zb.mean() / 60)
        print('Mean y', self.pmf_y.mean() / 60)

        cdf_z = self.pmf_z.makeCdf()
        cdf_zb = self.pmf_zb.makeCdf()
        cdf_y = self.pmf_y.makeCdf()

        cdfs = scaleDists([cdf_z, cdf_zb, cdf_y], 1.0 / 60)

        thinkplot.clf()
        thinkplot.prePlot(3)
        thinkplot.cdfs(cdfs)
        thinkplot.save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
예제 #18
0
def makePlots(player1, player2):
    """Generates two plots.

    price1 shows the priors for the two players
    price2 shows the distribution of diff for the two players
    """

    # plot the prior distribution of price for both players
    thinkplot.clf()
    thinkplot.prePlot(num=2)
    pmf1 = player1.pmfPrice()
    pmf1.name = 'showcase 1'
    pmf2 = player2.pmfPrice()
    pmf2.name = 'showcase 2'
    thinkplot.pmfs([pmf1, pmf2])
    thinkplot.save(root='price1_showcase1,2_priorPmfs',
                   xlabel='price ($)',
                   ylabel='PDF',
                   formats=FORMATS)

    # plot the historical distribution of underness for both players
    thinkplot.clf()
    thinkplot.prePlot(num=2)
    cdf1 = player1.cdfDiff()
    cdf1.name = 'player 1'
    cdf2 = player2.cdfDiff()
    cdf2.name = 'player 2'

    print('\n\nPlayer median', cdf1.percentile(50))
    print('Player median', cdf2.percentile(50))

    print('\nPlayer 1 overbids', player1.probOverbid())
    print('Player 2 overbids', player2.probOverbid())

    thinkplot.cdfs([cdf1, cdf2])
    thinkplot.save(root='price2_diffs_cdf',
                   xlabel='diff ($)',
                   ylabel='CDF',
                   formats=FORMATS)
예제 #19
0
def main():
    comparePriors()

    dataset = [30, 60, 90]

    thinkplot.clf()
    thinkplot.prePlot(num=3)

    for high in [500, 1000, 2000]:
        suite = makePosterior(high, dataset, Train2)
        print(high, suite.mean())
    # TODO: doesn't work:
    thinkplot.save(root='train3',
                   xlabel='Number of trains',
                   ylabel='Probability')

    interval = percentile(suite, 5), percentile(suite, 95)
    print(interval)

    cdf = thinkbayes.makeCdfFromPmf(suite)
    interval = cdf.percentile(5), cdf.percentile(95)
    print(interval)
예제 #20
0
def runLoop(gap_times, nums, lmbda=0.0333):
    """Runs the basic analysis for a range of num_passengers.

    gap_times: sequence of float
    nums: sequence of values for num_passengers
    lam: arrival rate in passengers per second

    Returns: WaitMixtureEstimator
    """
    global UPPER_BOUND
    UPPER_BOUND = 4000

    thinkplot.clf()

    randomSeed(18)

    # resample gap_times
    n = 220
    cdf_z = thinkbayes.makeCdfFromList(gap_times)
    sample_z = cdf_z.sample(n)
    pmf_z = thinkbayes.makePmfFromList(sample_z)

    # compute the biased pmf and add some long delays
    cdf_zp = biasPmf(pmf_z).makeCdf()
    sample_zb = cdf_zp.sample(n) + [1800, 2400, 3000]

    # smooth the distribution of zb
    pdf_zb = thinkbayes.EstimatedPDF(sample_zb)
    xs = makeRange(low=60)
    pmf_zb = pdf_zb.makePmf(xs)

    # unbias the distribution of zb and make wtc
    pmf_z = unbiasPmf(pmf_zb)
    wtc = WaitTimeCalculator(pmf_z)

    # NOTE: THis is the prob of long wait part on page 89
    # Given number of passengers on platform, problongwait makes an
    # * elapsedtimeestimator
    # * extracts dist of wait time (y)
    # * compute probability that wait time exceeds minutes (15 here)
    # RESULT PLOT: when passgrs num < 20, system isoperating normally so prob of long delay is small
    # But if greater than 30 pssgrs, then it has been 15 mins since last train, which is longer than
    # normal delay so need to take taxi.
    probs = []
    for num_passengers in nums:
        ete = ElapsedTimeEstimator(wtc, lmbda, num_passengers)

        # compute the posterior prob of waiting more than 15 minutes
        cdf_y = ete.pmf_y.makeCdf()
        prob = 1 - cdf_y.prob(900)
        probs.append(prob)

        # thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers)))

    thinkplot.plot(nums, probs)
    thinkplot.save(
        root='redline5',
        xlabel='Num passengers',
        ylabel='P(y > 15 min)',
        formats=FORMATS,
    )
def main():
    pmfDice = thinkbayes.PMF()
    pmfDice.set(Die(4), 5)
    pmfDice.set(Die(6), 4)
    pmfDice.set(Die(8), 3)
    pmfDice.set(Die(12), 2)
    pmfDice.set(Die(20), 1)
    pmfDice.normalize()

    #@fix: was unhashable error here:
    # http://stackoverflow.com/questions/10994229/how-to-make-an-object-properly-hashable
    # http://stackoverflow.com/questions/2909106/python-whats-a-correct-and-good-way-to-implement-hash

    mix = thinkbayes.PMF()
    for die, weight in pmfDice.items():
        for outcome, prob in die.items():
            mix.incr(outcome, weight * prob)

    mix = thinkbayes.makeMixture(pmfDice)

    colors = thinkplot.Brewer.getColors()
    thinkplot.hist(mix, width=0.9, color=colors[4])
    thinkplot.save(root='dungeons3',
                   xlabel='Outcome',
                   ylabel='Probability',
                   formats=FORMATS)

    random.seed(17)

    d6 = Die(6, 'd6')

    # finding distribution of rolled-dice sum by SIMULATION
    dice = [d6] * 3
    three = thinkbayes.sampleSum(dice, 1000)
    three.name = 'sample'
    print("\n\nSAMPLING: ")
    three.printSuite()

    # finding distribution of rolled-dice sum by ENUMERATION
    threeExact = d6 + d6 + d6
    threeExact.name = 'exact'
    print("\n\nENUMERATION:")
    threeExact.printSuite()

    thinkplot.prePlot(num=2)
    thinkplot.pmf(three)
    thinkplot.pmf(threeExact, linestyle='dashed')
    thinkplot.save(root='dungeons1',
                   xlabel='Sum of three d6',
                   ylabel='Probability',
                   axis=[2, 19, 0, 0.15],
                   formats=FORMATS)

    thinkplot.clf()
    thinkplot.prePlot(num=1)

    # Note: pmf of max (best) attribute:
    bestAttribute2 = pmfMax(threeExact, threeExact)
    bestAttribute4 = pmfMax(bestAttribute2, bestAttribute2)
    bestAttribute6 = pmfMax(bestAttribute4, bestAttribute2)
    thinkplot.pmf(bestAttribute6)

    # Note: finding pmf max using efficient Cdf method:
    bestAttributeCdf = threeExact.max(6)  #@ Max() in class Cdf
    bestAttributeCdf.name = ''
    bestAttributePmf = thinkbayes.makePmfFromCdf(bestAttributeCdf)
    bestAttributePmf.printSuite()

    thinkplot.pmf(bestAttributePmf)
    thinkplot.save(root='dungeons2',
                   xlabel='Sum of three d6',
                   ylabel='Probability',
                   axis=[2, 19, 0, 0.23],
                   formats=FORMATS)