def RunSimpleProcess(gap_times, lam=0.0333, num_passengers=15, plot=True): """Runs the basic analysis and generates figures. gap_times: sequence of float lam: arrival rate in passengers per second num_passengers: int number of passengers on the platform plot: boolean, whether to generate plots Returns: WaitTimeCalculator, ElapsedTimeEstimator """ global UPPER_BOUND UPPER_BOUND = 1200 cdf_z = thinkbayes2.Cdf(gap_times).Scale(1.0 / 60) print('CI z', cdf_z.CredibleInterval(90)) xs = MakeRange(low=10) pdf_z = thinkbayes2.EstimatedPdf(gap_times) pmf_z = pdf_z.MakePmf(xs=xs, label="z") wtc = WaitTimeCalculator(pmf_z, inverse=False) if plot: wtc.PlotPmfs() wtc.MakePlot() ete = ElapsedTimeEstimator(wtc, lam, num_passengers) if plot: ete.MakePlot() return wtc, ete
def PlotPosteriors(self, other): """Plots posterior distributions of efficacy. self, other: Sat objects. """ thinkplot.Clf() thinkplot.PrePlot(num=2) cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score) cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score) thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(xlabel='efficacy', ylabel='CDF', axis=[0, 4.6, 0.0, 1.0], root='sat_posteriors_eff', formats=['pdf', 'eps'])
def CalibrateDifficulty(self): """Make a plot showing the model distribution of raw scores.""" thinkplot.Clf() thinkplot.PrePlot(num=2) cdf = thinkbayes2.Cdf(self.raw, label='data') thinkplot.Cdf(cdf) efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3) pmf = self.MakeRawScoreDist(efficacies) cdf = thinkbayes2.Cdf(pmf, label='model') thinkplot.Cdf(cdf) thinkplot.Save(root='sat_calibrate', xlabel='raw score', ylabel='CDF', formats=['pdf', 'eps'])
def MakeCdf(): """Uses the data from Zhang et al. to construct a CDF.""" n = 53.0 freqs = [0, 2, 31, 42, 48, 51, 52, 53] ps = [freq/n for freq in freqs] xs = numpy.arange(-1.5, 6.5, 1.0) cdf = thinkbayes2.Cdf(xs, ps) return cdf
def GenerateSampleGaps(self, n): """Generates a random sample of gaps seen by passengers. n: sample size Returns: sequence of values """ cdf_zb = thinkbayes2.Cdf(self.pmf_zb) sample = cdf_zb.Sample(n) return sample
def GenerateSampleWaitTimes(self, n): """Generates a random sample of wait times. n: sample size Returns: sequence of values """ cdf_y = thinkbayes2.Cdf(self.pmf_y) sample = cdf_y.Sample(n) return sample
def RunLoop(gap_times, nums, lam=0.0333): """Runs the basic analysis for a range of num_passengers. gap_times: sequence of float nums: sequence of values for num_passengers lam: arrival rate in passengers per second Returns: WaitMixtureEstimator """ global UPPER_BOUND UPPER_BOUND = 4000 thinkplot.Clf() RandomSeed(18) # resample gap_times n = 220 cdf_z = thinkbayes2.Cdf(gap_times) sample_z = cdf_z.Sample(n) pmf_z = thinkbayes2.Pmf(sample_z) # compute the biased pmf and add some long delays cdf_zp = BiasPmf(pmf_z).MakeCdf() sample_zb = numpy.append(cdf_zp.Sample(n), [1800, 2400, 3000]) # smooth the distribution of zb pdf_zb = thinkbayes2.EstimatedPdf(sample_zb) xs = MakeRange(low=60) pmf_zb = pdf_zb.MakePmf(xs=xs) # unbias the distribution of zb and make wtc pmf_z = UnbiasPmf(pmf_zb) wtc = WaitTimeCalculator(pmf_z) probs = [] for num_passengers in nums: ete = ElapsedTimeEstimator(wtc, lam, num_passengers) # compute the posterior prob of waiting more than 15 minutes cdf_y = ete.pmf_y.MakeCdf() prob = 1 - cdf_y.Prob(900) probs.append(prob) # thinkplot.Cdf(ete.pmf_y.MakeCdf(label=str(num_passengers))) thinkplot.Plot(nums, probs) thinkplot.Save( root='redline5', xlabel='Num passengers', ylabel='P(y > 15 min)', formats=FORMATS, )
def PlotPriorDist(pmf): """Plot the prior distribution of p_correct. pmf: prior """ thinkplot.Clf() thinkplot.PrePlot(num=1) cdf1 = thinkbayes2.Cdf(pmf, label='prior') thinkplot.Cdf(cdf1) thinkplot.Save(root='sat_prior', xlabel='p_correct', ylabel='CDF', formats=['pdf', 'eps'])
def __init__(self, label=None): """ - Upon setting priors, we generate a pmf for each hypo that represents the probability that an observed user has not logged in for a specified amount of time. - This generation of pmfs was initially done in likelihood, but this became to computationally expensive to do given the size of our data set. It is faster to calculate all pmfs before trying to run any updates. """ # Ensure that the __init__'s of super classes are carried out super(Lambda, self).__init__() # Initialize container for hypo pmfs self.hypPmfs = [] # Iterate through all 100 hypos. These each represent hours since login for hypo in range(1, 101): # Set up exponential Pmf for a given lambda value; if (hypo != 0): interarrival = thinkbayes2.MakeExponentialPmf(1 / hypo, high=101) for val, prob in interarrival.Items(): interarrival[val] *= val interarrival.Normalize() # Make a mixture of uniform distributions of time since last login metapmf = thinkbayes2.Pmf() for time, prob in interarrival.Items(): if time == 0: continue pmf = thinkbayes2.MakeUniformPmf(0, time, 101) metapmf[pmf] = prob timesince = thinkbayes2.MakeMixture(metapmf) # Make a cdf using the mixture cdf = thinkbayes2.Cdf(timesince) # Take derivative of cdf to generate its pmf xs = numpy.linspace(0, 100, 101) ys = [scipy.misc.derivative(cdf.Prob, x) for x in xs] items = dict(zip(xs, ys)) pmf = thinkbayes2.MakePmfFromItems(items) pmf.Normalize() # Store pmf in object to be called on later in Likelihood self.hypPmfs.append(pmf)
def MakeNormalModel(weights): """Plots a CDF with a Normal model. weights: sequence """ cdf = thinkbayes2.Cdf(weights, label='weights') mean, var = thinkbayes2.TrimmedMeanVar(weights) std = math.sqrt(var) print('n, mean, std', len(weights), mean, std) xmin = mean - 4 * std xmax = mean + 4 * std xs, ps = thinkbayes2.RenderNormalCdf(mean, std, xmin, xmax) thinkplot.plot(xs, ps, label='model', linewidth=4, color='0.8') thinkplot.cdf(cdf)
def MakePmf(self, filler=None): """Makes a PMF of lifetimes. filler: value to replace missing values returns: Pmf """ cdf = thinkbayes2.Cdf(self.ts, 1 - self.ss) pmf = thinkbayes2.Pmf() for val, prob in cdf.Items(): pmf.Set(val, prob) cutoff = cdf.ps[-1] if filler is not None: pmf[filler] = 1 - cutoff return pmf
def PlotSurvival(complete): """Plots survival and hazard curves. complete: list of complete lifetimes """ thinkplot.PrePlot(3, rows=2) cdf = thinkbayes2.Cdf(complete, label='cdf') sf = MakeSurvivalFromCdf(cdf, label='survival') print(cdf[13]) print(sf[13]) thinkplot.Plot(sf) thinkplot.Cdf(cdf, alpha=0.2) thinkplot.Config() thinkplot.SubPlot(2) hf = sf.MakeHazardFunction(label='hazard') print(hf[39]) thinkplot.Plot(hf) thinkplot.Config(ylim=[0, 0.75])
def main(): ComparePriors() dataset = [30, 60, 90] thinkplot.Clf() thinkplot.PrePlot(num=3) for high in [500, 1000, 2000]: suite = MakePosterior(high, dataset, Train2) print(high, suite.Mean()) thinkplot.Save(root='train3', xlabel='Number of trains', ylabel='Probability') interval = suite.Percentile(5), suite.Percentile(95) print(interval) cdf = thinkbayes2.Cdf(suite) interval = cdf.Percentile(5), cdf.Percentile(95) print(interval)