def MakeUniformPmf(low, high): """Make a uniform Pmf. low: lowest value (inclusive) high: highest value (inclusive) """ xs = MakeRange(low, high) pmf = thinkbayes2.Pmf(xs) return pmf
def CoefVariation(suite): """Computes the distribution of CV. suite: Pmf that maps (x, y) to z Returns: Pmf object for CV. """ pmf = thinkbayes2.Pmf() for (m, s), p in suite.Items(): pmf.Incr(s / m, p) return pmf
def BinaryPmf(p): """Makes a Pmf with values 1 and 0. p: probability given to 1 Returns: Pmf object """ pmf = thinkbayes2.Pmf() pmf.Set(1, p) pmf.Set(0, 1 - p) return pmf
def DivideValues(pmf, denom): """Divides the values in a Pmf by denom. Returns a new Pmf. """ new = thinkbayes2.Pmf() denom = float(denom) for val, prob in pmf.Items(): x = val / denom new.Set(x, prob) return new
def MakeRawScoreDist(self, efficacies): """Makes the distribution of raw scores for given difficulty. efficacies: Pmf of efficacy """ pmfs = thinkbayes2.Pmf() for efficacy, prob in efficacies.Items(): scores = self.PmfCorrect(efficacy) pmfs.Set(scores, prob) mix = thinkbayes2.MakeMixture(pmfs) return mix
def RunLoop(gap_times, nums, lam=0.0333): """Runs the basic analysis for a range of num_passengers. gap_times: sequence of float nums: sequence of values for num_passengers lam: arrival rate in passengers per second Returns: WaitMixtureEstimator """ global UPPER_BOUND UPPER_BOUND = 4000 thinkplot.Clf() RandomSeed(18) # resample gap_times n = 220 cdf_z = thinkbayes2.Cdf(gap_times) sample_z = cdf_z.Sample(n) pmf_z = thinkbayes2.Pmf(sample_z) # compute the biased pmf and add some long delays cdf_zp = BiasPmf(pmf_z).MakeCdf() sample_zb = numpy.append(cdf_zp.Sample(n), [1800, 2400, 3000]) # smooth the distribution of zb pdf_zb = thinkbayes2.EstimatedPdf(sample_zb) xs = MakeRange(low=60) pmf_zb = pdf_zb.MakePmf(xs=xs) # unbias the distribution of zb and make wtc pmf_z = UnbiasPmf(pmf_zb) wtc = WaitTimeCalculator(pmf_z) probs = [] for num_passengers in nums: ete = ElapsedTimeEstimator(wtc, lam, num_passengers) # compute the posterior prob of waiting more than 15 minutes cdf_y = ete.pmf_y.MakeCdf() prob = 1 - cdf_y.Prob(900) probs.append(prob) # thinkplot.Cdf(ete.pmf_y.MakeCdf(label=str(num_passengers))) thinkplot.Plot(nums, probs) thinkplot.Save( root='redline5', xlabel='Num passengers', ylabel='P(y > 15 min)', formats=FORMATS, )
def PmfMax(pmf1, pmf2): """Computes the distribution of the max of values drawn from two Pmfs. pmf1, pmf2: Pmf objects returns: new Pmf """ res = thinkbayes2.Pmf() for v1, p1 in pmf1.Items(): for v2, p2 in pmf2.Items(): res.Incr(max(v1, v2), p1 * p2) return res
def __init__(self): self.scale = ReadScale() scores = ReadRanks() score_pmf = thinkbayes2.Pmf(dict(scores)) self.raw = self.ReverseScale(score_pmf) self.max_score = max(self.raw.Values()) self.prior = DivideValues(self.raw, denom=self.max_score) center = -0.05 width = 1.8 self.difficulties = MakeDifficulties(center, width, self.max_score)
def PmfCorrect(efficacy, difficulties): """Computes the distribution of correct responses. efficacy: personal ability to answer questions difficulties: list of difficulties, one for each question Returns: new Pmf object """ pmf0 = thinkbayes2.Pmf([0]) ps = [ProbCorrect(efficacy, difficulty) for difficulty in difficulties] pmfs = [BinaryPmf(p) for p in ps] dist = sum(pmfs, pmf0) return dist
def PmfOfWaitTime(pmf_zb): """Distribution of wait time. pmf_zb: dist of gap time as seen by a random observer Returns: dist of wait time (also dist of elapsed time) """ metapmf = thinkbayes2.Pmf() for gap, prob in pmf_zb.Items(): uniform = MakeUniformPmf(0, gap) metapmf.Set(uniform, prob) pmf_y = thinkbayes2.MakeMixture(metapmf, label='y') return pmf_y
def MakeGoalTimePmf(suite): """Makes the distribution of time til first goal. suite: distribution of goal-scoring rate returns: Pmf of goals per game """ metapmf = thinkbayes2.Pmf() for lam, prob in suite.Items(): pmf = thinkbayes2.MakeExponentialPmf(lam, high=2, n=2001) metapmf.Set(pmf, prob) mix = thinkbayes2.MakeMixture(metapmf, label=suite.label) return mix
def after_two_days(trap): """ Predict the posterior distribution after two days """ mix = thinkbayes2.Pmf() for lam1, prob1 in trap.Items(): for lam2, prob2 in trap.Items(): if lam1 + lam2 <= trap.nb_trap: mix.Incr(lam1 + lam2, prob1 * prob2) mix.Normalize() return mix
def after_two_months(bulb): """ Predict the posterior distribution after two days """ mix = thinkbayes2.Pmf() for lam1, prob1 in bulb.Items(): for lam2, prob2 in bulb.Items(): if lam1 + lam2 <= bulb.nb_bulb: mix.Incr(lam1 + lam2, prob1 * prob2) mix.Normalize() return mix
def ReverseScale(self, pmf): """Applies the reverse scale to the values of a PMF. Args: pmf: Pmf object scale: Interpolator object Returns: new Pmf """ new = thinkbayes2.Pmf() for val, prob in pmf.Items(): raw = self.Reverse(val) new.Incr(raw, prob) return new
def PredRemaining(self, rem_time, points_scored): """Plots the predictive distribution for final number of goals. rem_time: remaining time in the game in minutes points_scored: points already scored """ scorePredict = self.score.PredRemaining(rem_time, 0) scorePmf = thinkbayes2.Pmf() for prob_td, prob_p in self.TDPercent.Items(): tdProbPmf = thinkbayes2.Pmf() for scores, prob_s in scorePredict.Items(): for num_tds in range(scores + 1): num_fgs = scores - num_tds points = 7 * num_tds + 3 * num_fgs ncr = thinkbayes2.BinomialCoef(scores, num_tds) tdProbPmf.Incr( points, prob_s * ncr * (prob_td**num_tds * (1 - prob_td)**num_fgs)) scorePmf.Incr(tdProbPmf, prob_p) mix = thinkbayes2.MakeMixture(scorePmf) mix += points_scored return mix
def __init__(self, label=None): """ - Upon setting priors, we generate a pmf for each hypo that represents the probability that an observed user has not logged in for a specified amount of time. - This generation of pmfs was initially done in likelihood, but this became to computationally expensive to do given the size of our data set. It is faster to calculate all pmfs before trying to run any updates. """ # Ensure that the __init__'s of super classes are carried out super(Lambda, self).__init__() # Initialize container for hypo pmfs self.hypPmfs = [] # Iterate through all 100 hypos. These each represent hours since login for hypo in range(1, 101): # Set up exponential Pmf for a given lambda value; if (hypo != 0): interarrival = thinkbayes2.MakeExponentialPmf(1 / hypo, high=101) for val, prob in interarrival.Items(): interarrival[val] *= val interarrival.Normalize() # Make a mixture of uniform distributions of time since last login metapmf = thinkbayes2.Pmf() for time, prob in interarrival.Items(): if time == 0: continue pmf = thinkbayes2.MakeUniformPmf(0, time, 101) metapmf[pmf] = prob timesince = thinkbayes2.MakeMixture(metapmf) # Make a cdf using the mixture cdf = thinkbayes2.Cdf(timesince) # Take derivative of cdf to generate its pmf xs = numpy.linspace(0, 100, 101) ys = [scipy.misc.derivative(cdf.Prob, x) for x in xs] items = dict(zip(xs, ys)) pmf = thinkbayes2.MakePmfFromItems(items) pmf.Normalize() # Store pmf in object to be called on later in Likelihood self.hypPmfs.append(pmf)
def PlotConditionalSurvival(durations): """Plots conditional survival curves for a range of t0. durations: list of durations """ pmf = thinkbayes2.Pmf(durations) times = [8, 16, 24, 32] thinkplot.PrePlot(len(times)) for t0 in times: sf = ConditionalSurvival(pmf, t0) label = 't0=%d' % t0 thinkplot.Plot(sf, label=label) thinkplot.Show()
def MakeGoalPmf(suite, high=10): """Makes the distribution of goals scored, given distribution of lam. suite: distribution of goal-scoring rate high: upper bound returns: Pmf of goals per game """ metapmf = thinkbayes2.Pmf() for lam, prob in suite.Items(): pmf = thinkbayes2.MakePoissonPmf(lam, high) metapmf.Set(pmf, prob) mix = thinkbayes2.MakeMixture(metapmf, label=suite.label) return mix
def PredRemaining(self, rem_time, score): """Plots the predictive distribution for final number of goals. rem_time: remaining time in the game in minutes score: number of goals already scored """ metapmf = thinkbayes2.Pmf( ) #PMF about PMFS. probabilities of pmf values for lam, prob in self.Items(): #loop through probabilities of lamdas lt = lam * rem_time / 60 pmf = thinkbayes2.MakePoissonPmf(lt, 20) metapmf[pmf] = prob mix = thinkbayes2.MakeMixture(metapmf) mix += score return mix
def ConditionalSurvival(pmf, t0): """Computes conditional survival function. Probability that duration exceeds t0+t, given that duration >= t0. pmf: Pmf of durations t0: minimum time returns: tuple of (ts, conditional survivals) """ cond = thinkbayes2.Pmf() for t, p in pmf.Items(): if t >= t0: cond.Set(t - t0, p) cond.Normalize() return MakeSurvivalFromCdf(cond.MakeCdf())
def PredRemaining(self, rem_time, score): """Plots the predictive distribution for final number of goals. rem_time: remaining time in the game in minutes score: number of goals already scored """ metapmf = thinkbayes2.Pmf() for lam, prob in self.Items(): lt = lam * rem_time / 90 pred = thinkbayes2.MakePoissonPmf(lt, 15) metapmf[pred] = prob #thinkplot.Pdf(pred, color='gray', alpha=0.1, linewidth=0.5) mix = thinkbayes2.MakeMixture(metapmf) mix += score thinkplot.Hist(mix) thinkplot.Show()
def MakePmf(self, filler=None): """Makes a PMF of lifetimes. filler: value to replace missing values returns: Pmf """ cdf = thinkbayes2.Cdf(self.ts, 1 - self.ss) pmf = thinkbayes2.Pmf() for val, prob in cdf.Items(): pmf.Set(val, prob) cutoff = cdf.ps[-1] if filler is not None: pmf[filler] = 1 - cutoff return pmf
def __init__(self, wtc, are, num_passengers=15): """Constructor. wtc: WaitTimeCalculator are: ArrivalTimeEstimator num_passengers: number of passengers seen on the platform """ self.metapmf = thinkbayes2.Pmf() for lam, prob in sorted(are.post_lam.Items()): ete = ElapsedTimeEstimator(wtc, lam, num_passengers) self.metapmf.Set(ete.pmf_y, prob) self.mixture = thinkbayes2.MakeMixture(self.metapmf) lam = are.post_lam.Mean() ete = ElapsedTimeEstimator(wtc, lam, num_passengers) self.point = ete.pmf_y
def MakePmfTest(shelf, **options): """Makes a discrete version of this Pdf. options can include label: string low: low end of range high: high end of range n: number of places to evaluate Returns: new Pmf """ #print(options) label = options.pop('label', '') xs, ds = Render(shelf,**options) #print() #print(xs) #print(ds) return thinkbayes2.Pmf(dict(zip(xs, ds)), label=label)
def PredRemaining(self, rem_time, score): """Plots the predictive distribution for final number of goals. rem_time: remaining time in the game in minutes score: number of goals already scored """ metapmf = thinkbayes2.Pmf( ) #PMF about PMFS. probabilities of pmf values for lam, prob in self.Items(): #loop through probabilities of lamdas #print(lam,prob) lt = lam * rem_time / 60 pmf = thinkbayes2.MakePoissonPmf(lt, 20) #thinkplot.Pdf(pmf,linewidth=1,alpha=0.2,color='purple') metapmf[pmf] = prob mix = thinkbayes2.MakeMixture(metapmf) mix += score #shift by 2 because we've already seen 2 return mix
def MakeLocationPmf(alpha, beta, locations): """Computes the Pmf of the locations, given alpha and beta. Given that the shooter is at coordinates (alpha, beta), the probability of hitting any spot is inversely proportionate to the strafe speed. alpha: x position beta: y position locations: x locations where the pmf is evaluated Returns: Pmf object """ pmf = thinkbayes2.Pmf() for x in locations: prob = 1.0 / StrafingSpeed(alpha, beta, x) pmf.Set(x, prob) pmf.Normalize() return pmf
def __init__(self, xs, pcounts, passenger_data): self.xs = xs self.pcounts = pcounts self.passenger_data = passenger_data self.wait_times = [y for _k1, y, _k2 in passenger_data] self.pmf_y = thinkbayes2.Pmf(self.wait_times, label="y") dirichlet = GapDirichlet2(self.xs) dirichlet.params /= 1.0 dirichlet.Preload(self.pcounts) dirichlet.params /= 20.0 self.prior_zb = dirichlet.PredictivePmf(self.xs, label="prior zb") for k1, y, _k2 in passenger_data: dirichlet.Update((k1, y)) self.pmf_mean_zb = dirichlet.PmfMeanZb() self.post_zb = dirichlet.PredictivePmf(self.xs, label="post zb") self.post_z = UnbiasPmf(self.post_zb, label="post z")
def PmfMeanZb(self): """Makes the Pmf of mean zb. Values stored in mean_zbs. """ return thinkbayes2.Pmf(self.mean_zbs)
def main(): pmf_dice = thinkbayes2.Pmf() pmf_dice.Set(Die(4), 5) pmf_dice.Set(Die(6), 4) pmf_dice.Set(Die(8), 3) pmf_dice.Set(Die(12), 2) pmf_dice.Set(Die(20), 1) pmf_dice.Normalize() mix = thinkbayes2.Pmf() for die, weight in pmf_dice.Items(): for outcome, prob in die.Items(): mix.Incr(outcome, weight * prob) mix = thinkbayes2.MakeMixture(pmf_dice) thinkplot.Hist(mix, width=0.9) thinkplot.Save(root='dungeons3', xlabel='Outcome', ylabel='Probability', formats=FORMATS) random.seed(17) d6 = Die(6, 'd6') dice = [d6] * 3 three = thinkbayes2.SampleSum(dice, 1000) three.label = 'sample' three.Print() three_exact = d6 + d6 + d6 three_exact.label = 'exact' three_exact.Print() thinkplot.PrePlot(num=2) thinkplot.Pmf(three) thinkplot.Pmf(three_exact, linestyle='dashed') thinkplot.Save(root='dungeons1', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.15], formats=FORMATS) thinkplot.Clf() thinkplot.PrePlot(num=1) # compute the distribution of the best attribute the hard way best_attr2 = PmfMax(three_exact, three_exact) best_attr4 = PmfMax(best_attr2, best_attr2) best_attr6 = PmfMax(best_attr4, best_attr2) # thinkplot.Pmf(best_attr6) # and the easy way best_attr_cdf = three_exact.Max(6) best_attr_cdf.label = '' best_attr_pmf = best_attr_cdf.MakePmf() best_attr_pmf.Print() thinkplot.Pmf(best_attr_pmf) thinkplot.Save(root='dungeons2', xlabel='Sum of three d6', ylabel='Probability', axis=[2, 19, 0, 0.23], formats=FORMATS, legend=False)