def comparePriors(): """Runs the analysis with two different priors and compares them.""" dataset = [60] high = 1000 thinkplot.clf() thinkplot.prePlot(num=2) constructors = [Train, Train2] labels = ['uniform', 'power law'] # NOTE: the uniform prior means we assign probability 1/1000 to each hypotheses from 1 ... 1000 # note then we normalize it and update by multiplying by likelihood then normalize again (why?) # NOTE: the power law prior means we assign 1/hypo to each hypothesis from 1 ... 1000 # note: then normalize by summing total and dividing then update by likelihood and normalize again (why?) for constructor, label in zip(constructors, labels): suite = makePosterior(high, dataset, constructor) suite.name = label thinkplot.pmf(suite) thinkplot.save(root='train4', xlabel='Number of trains', ylabel='Probability')
def plotCoefVariation(suites): """Plot the posterior distributions for CV. suites: map from label to Pmf of CVs. """ thinkplot.clf() thinkplot.prePlot(num=2) pmfs = {} for label, suite in suites.iteritems(): pmf = coefVariation(suite) print('CV posterior mean', pmf.mean()) cdf = thinkbayes.makeCdfFromPmf(pmf, label) thinkplot.cdf(cdf) pmfs[label] = pmf thinkplot.save(root='variability_cv', xlabel='Coefficient of variation', ylabel='Probability') print('female bigger', thinkbayes.pmfProbGreater(pmfs['female'], pmfs['male'])) print('male bigger', thinkbayes.pmfProbGreater(pmfs['male'], pmfs['female']))
def makePlot(self, root='redline4'): """Makes a plot showing the mixture.""" thinkplot.clf() # plot the MetaPmf for pmf, prob in sorted(self.metaPmf.items()): cdf = pmf.makeCdf().scale(1.0 / 60) width = 2 / math.log(-math.log(prob)) thinkplot.plot(cdf.xs, cdf.ps, alpha=0.2, linewidth=width, color='blue', label='') # plot the mixture and the distribution based on a point estimate thinkplot.prePlot(2) #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60)) thinkplot.cdf(self.mixture.makeCdf(name='mix').scale(1.0 / 60)) thinkplot.save(root=root, xlabel='Wait time (min)', ylabel='CDF', formats=FORMATS, axis=[0, 10, 0, 1])
def plotBeliefs(self, root): """Plots prior and posterior beliefs. root: string filename root for saved figure """ thinkplot.clf() thinkplot.prePlot(num=2) thinkplot.pmfs([self.prior, self.posterior]) thinkplot.save(root=root, xlabel='price ($)', ylabel='PMF', formats=FORMATS)
def plotPosterior(suite, pcolor=False, contour=True): """Makes a contour plot. suite: Suite that maps (mu, sigma) to probability """ thinkplot.clf() thinkplot.contour(suite.getDict(), pcolor=pcolor, contour=contour) thinkplot.save(root='variability_posterior_%s' % suite.name, title='Posterior joint distribution', xlabel='Mean height (cm)', ylabel='Stddev (cm)')
def plotSuites(suites, root): """Plots two suites. suite1, suite2: Suite objects root: string filename to write """ thinkplot.clf() thinkplot.prePlot(len(suites)) thinkplot.pmfs(suites) thinkplot.save(root=root, xlabel='x', ylabel='Probability', formats=['pdf', 'eps'])
def plotPmfs(self, root='redline0'): """Plots the computed Pmfs. root: string """ pmfs = scaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60) thinkplot.clf() thinkplot.prePlot(2) thinkplot.pmfs(pmfs) thinkplot.save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def plotPriorDist(pmf): """Plot the prior distribution of p_correct. pmf: prior """ thinkplot.clf() thinkplot.prePlot(num=1) cdf1 = thinkbayes.makeCdfFromPmf(pmf, 'prior') thinkplot.cdf(cdf1) thinkplot.save(root='sat_1_prior', xlabel='p_correct', ylabel='CDF', formats=['pdf']) # ['pdf', 'eps'])
def plotOutliers(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = thinkbayes.makeCdfFromList(outliers, label) cdfs.append(cdf) thinkplot.clf() thinkplot.cdfs(cdfs) thinkplot.save(root='variability_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def plotCdfs(d, labels): """Plot CDFs for each sequence in a dictionary. Jitters the data and subtracts away the mean. d: map from key to sequence of values labels: map from key to string label """ thinkplot.clf() for key, xs in d.iteritems(): mu = thinkstats.mean(xs) xs = thinkstats.jitter(xs, 1.3) xs = [x - mu for x in xs] cdf = thinkbayes.makeCdfFromList(xs) thinkplot.cdf(cdf, label=labels[key]) thinkplot.show()
def plotPosteriors(self, other): """Plots posterior distributions of efficacy. self, other: Sat objects. """ thinkplot.clf() thinkplot.prePlot(num=2) cdf1 = thinkbayes.makeCdfFromPmf(self, 'posterior %d' % self.score) cdf2 = thinkbayes.makeCdfFromPmf(other, 'posterior %d' % other.score) thinkplot.cdfs([cdf1, cdf2]) thinkplot.save(xlabel='efficacy', ylabel='CDF', axis=[0, 4.6, 0.0, 1.0], root='sat_5_posteriors_eff', formats=['pdf'])
def calibrateDifficulty(self): """Make a plot showing the model distribution of raw scores.""" thinkplot.clf() thinkplot.prePlot(num=2) cdf = thinkbayes.makeCdfFromPmf(self.raw, name='data') thinkplot.cdf(cdf) efficacies = thinkbayes.makeGaussianPmf(0, 1.5, 3) pmf = self.makeRawScoreDist( efficacies) # mixture model of raw score, prob = p1 * p2 cdf = thinkbayes.makeCdfFromPmf(pmf, name='model') thinkplot.cdf(cdf) thinkplot.save(root='sat_2_calibrate', xlabel='raw score', ylabel='CDF', formats=['pdf'])
def plotMarginals(suite): """Plots marginal distributions from a joint distribution. suite: joint distribution of mu and sigma. """ thinkplot.clf() pyplot.subplot(1, 2, 1) pmf_m = suite.marginal(0) cdf_m = thinkbayes.makeCdfFromPmf(pmf_m) thinkplot.cdf(cdf_m) pyplot.subplot(1, 2, 2) pmf_s = suite.marginal(1) cdf_s = thinkbayes.makeCdfFromPmf(pmf_s) thinkplot.cdf(cdf_s) thinkplot.show()
def makePlot(self, root='redline1'): """Plot the prior and posterior CDF of passengers arrival rate. root: string """ thinkplot.clf() thinkplot.prePlot(2) # convert units to passengers per minute prior = self.priorLambda.makeCdf().scale(60) post = self.posteriorLambda.makeCdf().scale(60) thinkplot.cdfs([prior, post]) thinkplot.save(root=root, xlabel='Arrival rate (passengers / min)', ylabel='CDF', formats=FORMATS)
def makePlot(self, root='redline3'): """Plot the CDFs. root: string """ # observed gaps cdf_prior_x = self.prior_x.makeCdf() cdf_post_x = self.post_x.makeCdf() cdf_y = self.pmf_y.makeCdf() cdfs = scaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60) thinkplot.clf() thinkplot.prePlot(3) thinkplot.cdfs(cdfs) thinkplot.save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def plotExpectedGains(guess1=20000, guess2=40000): """Plots expected gains as a function of bid. guess1: player1's estimate of the price of showcase 1 guess2: player2's estimate of the price of showcase 2 """ player1, player2 = makePlayers() makePlots(player1, player2) player1.makeBeliefs(guess1) player2.makeBeliefs(guess2) print('\n\nPlayer 1 prior mle', player1.prior.maximumLikelihood()) print('Player 2 prior mle', player2.prior.maximumLikelihood()) print('\nPlayer 1 mean', player1.posterior.mean()) print('Player 2 mean', player2.posterior.mean()) print('\nPlayer 1 mle', player1.posterior.maximumLikelihood()) print('Player 2 mle', player2.posterior.maximumLikelihood()) player1.plotBeliefs('price3_prior,posterior_player1') # was price3 player2.plotBeliefs('price4_prior,posterior_player2') # was price4 calc1 = GainCalculator(player1, player2) calc2 = GainCalculator(player2, player1) thinkplot.clf() thinkplot.prePlot(num=2) # NOTE: player 1 optimal bid = 21,000, expgain = 16,700, best guesss = 20,000 bids, gains = calc1.expectedGains() thinkplot.plot(bids, gains, label='Player 1') print('\nPlayer 1 optimal bid', max(zip(gains, bids))) # NOTE: player 2 optimal bid = 31,500, expgain = 19,400, best guess = 40,000 bids, gains = calc2.expectedGains() thinkplot.plot(bids, gains, label='Player 2') print('Player 2 optimal bid', max(zip(gains, bids))) thinkplot.save(root='price5_expectedGainsFromBids_player1,2', xlabel='bid ($)', ylabel='expected gain ($)', formats=FORMATS)
def makePlot(self, root='redline2'): """Plots the computed CDFs. root: string """ print('Mean z', self.pmf_z.mean() / 60) print('Mean zb', self.pmf_zb.mean() / 60) print('Mean y', self.pmf_y.mean() / 60) cdf_z = self.pmf_z.makeCdf() cdf_zb = self.pmf_zb.makeCdf() cdf_y = self.pmf_y.makeCdf() cdfs = scaleDists([cdf_z, cdf_zb, cdf_y], 1.0 / 60) thinkplot.clf() thinkplot.prePlot(3) thinkplot.cdfs(cdfs) thinkplot.save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def makePlots(player1, player2): """Generates two plots. price1 shows the priors for the two players price2 shows the distribution of diff for the two players """ # plot the prior distribution of price for both players thinkplot.clf() thinkplot.prePlot(num=2) pmf1 = player1.pmfPrice() pmf1.name = 'showcase 1' pmf2 = player2.pmfPrice() pmf2.name = 'showcase 2' thinkplot.pmfs([pmf1, pmf2]) thinkplot.save(root='price1_showcase1,2_priorPmfs', xlabel='price ($)', ylabel='PDF', formats=FORMATS) # plot the historical distribution of underness for both players thinkplot.clf() thinkplot.prePlot(num=2) cdf1 = player1.cdfDiff() cdf1.name = 'player 1' cdf2 = player2.cdfDiff() cdf2.name = 'player 2' print('\n\nPlayer median', cdf1.percentile(50)) print('Player median', cdf2.percentile(50)) print('\nPlayer 1 overbids', player1.probOverbid()) print('Player 2 overbids', player2.probOverbid()) thinkplot.cdfs([cdf1, cdf2]) thinkplot.save(root='price2_diffs_cdf', xlabel='diff ($)', ylabel='CDF', formats=FORMATS)
def main(): comparePriors() dataset = [30, 60, 90] thinkplot.clf() thinkplot.prePlot(num=3) for high in [500, 1000, 2000]: suite = makePosterior(high, dataset, Train2) print(high, suite.mean()) # TODO: doesn't work: thinkplot.save(root='train3', xlabel='Number of trains', ylabel='Probability') interval = percentile(suite, 5), percentile(suite, 95) print(interval) cdf = thinkbayes.makeCdfFromPmf(suite) interval = cdf.percentile(5), cdf.percentile(95) print(interval)
def runLoop(gap_times, nums, lmbda=0.0333): """Runs the basic analysis for a range of num_passengers. gap_times: sequence of float nums: sequence of values for num_passengers lam: arrival rate in passengers per second Returns: WaitMixtureEstimator """ global UPPER_BOUND UPPER_BOUND = 4000 thinkplot.clf() randomSeed(18) # resample gap_times n = 220 cdf_z = thinkbayes.makeCdfFromList(gap_times) sample_z = cdf_z.sample(n) pmf_z = thinkbayes.makePmfFromList(sample_z) # compute the biased pmf and add some long delays cdf_zp = biasPmf(pmf_z).makeCdf() sample_zb = cdf_zp.sample(n) + [1800, 2400, 3000] # smooth the distribution of zb pdf_zb = thinkbayes.EstimatedPDF(sample_zb) xs = makeRange(low=60) pmf_zb = pdf_zb.makePmf(xs) # unbias the distribution of zb and make wtc pmf_z = unbiasPmf(pmf_zb) wtc = WaitTimeCalculator(pmf_z) # NOTE: THis is the prob of long wait part on page 89 # Given number of passengers on platform, problongwait makes an # * elapsedtimeestimator # * extracts dist of wait time (y) # * compute probability that wait time exceeds minutes (15 here) # RESULT PLOT: when passgrs num < 20, system isoperating normally so prob of long delay is small # But if greater than 30 pssgrs, then it has been 15 mins since last train, which is longer than # normal delay so need to take taxi. probs = [] for num_passengers in nums: ete = ElapsedTimeEstimator(wtc, lmbda, num_passengers) # compute the posterior prob of waiting more than 15 minutes cdf_y = ete.pmf_y.makeCdf() prob = 1 - cdf_y.prob(900) probs.append(prob) # thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers))) thinkplot.plot(nums, probs) thinkplot.save( root='redline5', xlabel='Num passengers', ylabel='P(y > 15 min)', formats=FORMATS, )
def main(): pmfDice = thinkbayes.PMF() pmfDice.set(Die(4), 5) pmfDice.set(Die(6), 4) pmfDice.set(Die(8), 3) pmfDice.set(Die(12), 2) pmfDice.set(Die(20), 1) pmfDice.normalize() #@fix: was unhashable error here: # http://stackoverflow.com/questions/10994229/how-to-make-an-object-properly-hashable # http://stackoverflow.com/questions/2909106/python-whats-a-correct-and-good-way-to-implement-hash mix = thinkbayes.PMF() for die, weight in pmfDice.items(): for outcome, prob in die.items(): mix.incr(outcome, weight * prob) mix = thinkbayes.makeMixture(pmfDice) colors = thinkplot.Brewer.getColors() thinkplot.hist(mix, width=0.9, color=colors[4]) thinkplot.save(root='dungeons3', xlabel='Outcome', ylabel='Probability', formats=FORMATS) random.seed(17) d6 = Die(6, 'd6') # finding distribution of rolled-dice sum by SIMULATION dice = [d6] * 3 three = thinkbayes.sampleSum(dice, 1000) three.name = 'sample' print("\n\nSAMPLING: ") three.printSuite() # finding distribution of rolled-dice sum by ENUMERATION threeExact = d6 + d6 + d6 threeExact.name = 'exact' print("\n\nENUMERATION:") threeExact.printSuite() thinkplot.prePlot(num=2) thinkplot.pmf(three) thinkplot.pmf(threeExact, linestyle='dashed') thinkplot.save(root='dungeons1', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.15], formats=FORMATS) thinkplot.clf() thinkplot.prePlot(num=1) # Note: pmf of max (best) attribute: bestAttribute2 = pmfMax(threeExact, threeExact) bestAttribute4 = pmfMax(bestAttribute2, bestAttribute2) bestAttribute6 = pmfMax(bestAttribute4, bestAttribute2) thinkplot.pmf(bestAttribute6) # Note: finding pmf max using efficient Cdf method: bestAttributeCdf = threeExact.max(6) #@ Max() in class Cdf bestAttributeCdf.name = '' bestAttributePmf = thinkbayes.makePmfFromCdf(bestAttributeCdf) bestAttributePmf.printSuite() thinkplot.pmf(bestAttributePmf) thinkplot.save(root='dungeons2', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.23], formats=FORMATS)