Example #1
0
def RunSimpleProcess(gap_times, lam=0.0333, num_passengers=15, plot=True):
    """Runs the basic analysis and generates figures.

    gap_times: sequence of float
    lam: arrival rate in passengers per second
    num_passengers: int number of passengers on the platform
    plot: boolean, whether to generate plots

    Returns: WaitTimeCalculator, ElapsedTimeEstimator
    """
    global UPPER_BOUND
    UPPER_BOUND = 1200

    cdf_z = thinkbayes2.Cdf(gap_times).Scale(1.0 / 60)
    print('CI z', cdf_z.CredibleInterval(90))

    xs = MakeRange(low=10)

    pdf_z = thinkbayes2.EstimatedPdf(gap_times)
    pmf_z = pdf_z.MakePmf(xs=xs, label="z")

    wtc = WaitTimeCalculator(pmf_z, inverse=False)

    if plot:
        wtc.PlotPmfs()
        wtc.MakePlot()

    ete = ElapsedTimeEstimator(wtc, lam, num_passengers)

    if plot:
        ete.MakePlot()

    return wtc, ete
Example #2
0
    def PlotPosteriors(self, other):
        """Plots posterior distributions of efficacy.

        self, other: Sat objects.
        """
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)

        cdf1 = thinkbayes2.Cdf(self, label='posterior %d' % self.score)
        cdf2 = thinkbayes2.Cdf(other, label='posterior %d' % other.score)

        thinkplot.Cdfs([cdf1, cdf2])
        thinkplot.Save(xlabel='efficacy',
                       ylabel='CDF',
                       axis=[0, 4.6, 0.0, 1.0],
                       root='sat_posteriors_eff',
                       formats=['pdf', 'eps'])
Example #3
0
    def CalibrateDifficulty(self):
        """Make a plot showing the model distribution of raw scores."""
        thinkplot.Clf()
        thinkplot.PrePlot(num=2)

        cdf = thinkbayes2.Cdf(self.raw, label='data')
        thinkplot.Cdf(cdf)

        efficacies = thinkbayes2.MakeNormalPmf(0, 1.5, 3)
        pmf = self.MakeRawScoreDist(efficacies)
        cdf = thinkbayes2.Cdf(pmf, label='model')
        thinkplot.Cdf(cdf)

        thinkplot.Save(root='sat_calibrate',
                       xlabel='raw score',
                       ylabel='CDF',
                       formats=['pdf', 'eps'])
Example #4
0
def MakeCdf():
    """Uses the data from Zhang et al. to construct a CDF."""
    n = 53.0
    freqs = [0, 2, 31, 42, 48, 51, 52, 53]
    ps = [freq/n for freq in freqs]
    xs = numpy.arange(-1.5, 6.5, 1.0)

    cdf = thinkbayes2.Cdf(xs, ps)
    return cdf
Example #5
0
    def GenerateSampleGaps(self, n):
        """Generates a random sample of gaps seen by passengers.

        n: sample size

        Returns: sequence of values
        """
        cdf_zb = thinkbayes2.Cdf(self.pmf_zb)
        sample = cdf_zb.Sample(n)
        return sample
Example #6
0
    def GenerateSampleWaitTimes(self, n):
        """Generates a random sample of wait times.

        n: sample size

        Returns: sequence of values
        """
        cdf_y = thinkbayes2.Cdf(self.pmf_y)
        sample = cdf_y.Sample(n)
        return sample
Example #7
0
def RunLoop(gap_times, nums, lam=0.0333):
    """Runs the basic analysis for a range of num_passengers.

    gap_times: sequence of float
    nums: sequence of values for num_passengers
    lam: arrival rate in passengers per second

    Returns: WaitMixtureEstimator
    """
    global UPPER_BOUND
    UPPER_BOUND = 4000

    thinkplot.Clf()

    RandomSeed(18)

    # resample gap_times
    n = 220
    cdf_z = thinkbayes2.Cdf(gap_times)
    sample_z = cdf_z.Sample(n)
    pmf_z = thinkbayes2.Pmf(sample_z)

    # compute the biased pmf and add some long delays
    cdf_zp = BiasPmf(pmf_z).MakeCdf()
    sample_zb = numpy.append(cdf_zp.Sample(n), [1800, 2400, 3000])

    # smooth the distribution of zb
    pdf_zb = thinkbayes2.EstimatedPdf(sample_zb)
    xs = MakeRange(low=60)
    pmf_zb = pdf_zb.MakePmf(xs=xs)

    # unbias the distribution of zb and make wtc
    pmf_z = UnbiasPmf(pmf_zb)
    wtc = WaitTimeCalculator(pmf_z)

    probs = []
    for num_passengers in nums:
        ete = ElapsedTimeEstimator(wtc, lam, num_passengers)

        # compute the posterior prob of waiting more than 15 minutes
        cdf_y = ete.pmf_y.MakeCdf()
        prob = 1 - cdf_y.Prob(900)
        probs.append(prob)

        # thinkplot.Cdf(ete.pmf_y.MakeCdf(label=str(num_passengers)))

    thinkplot.Plot(nums, probs)
    thinkplot.Save(
        root='redline5',
        xlabel='Num passengers',
        ylabel='P(y > 15 min)',
        formats=FORMATS,
    )
Example #8
0
def PlotPriorDist(pmf):
    """Plot the prior distribution of p_correct.

    pmf: prior
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=1)

    cdf1 = thinkbayes2.Cdf(pmf, label='prior')

    thinkplot.Cdf(cdf1)
    thinkplot.Save(root='sat_prior',
                   xlabel='p_correct',
                   ylabel='CDF',
                   formats=['pdf', 'eps'])
Example #9
0
    def __init__(self, label=None):
        """
            - Upon setting priors, we generate a pmf for each hypo that represents
            the probability that an observed user has not logged in for a
            specified amount of time.
            - This generation of pmfs was initially done in likelihood, but this
            became to computationally expensive to do given the size of our data
            set. It is faster to calculate all pmfs before trying to run any updates.
        """
        # Ensure that the __init__'s of super classes are carried out
        super(Lambda, self).__init__()

        # Initialize container for hypo pmfs
        self.hypPmfs = []

        # Iterate through all 100 hypos. These each represent hours since login
        for hypo in range(1, 101):

            # Set up exponential Pmf for a given lambda value;
            if (hypo != 0):
                interarrival = thinkbayes2.MakeExponentialPmf(1 / hypo,
                                                              high=101)
            for val, prob in interarrival.Items():
                interarrival[val] *= val
            interarrival.Normalize()

            # Make a mixture of uniform distributions of time since last login
            metapmf = thinkbayes2.Pmf()
            for time, prob in interarrival.Items():
                if time == 0:
                    continue
                pmf = thinkbayes2.MakeUniformPmf(0, time, 101)
                metapmf[pmf] = prob

            timesince = thinkbayes2.MakeMixture(metapmf)

            # Make a cdf using the mixture
            cdf = thinkbayes2.Cdf(timesince)

            # Take derivative of cdf to generate its pmf
            xs = numpy.linspace(0, 100, 101)
            ys = [scipy.misc.derivative(cdf.Prob, x) for x in xs]
            items = dict(zip(xs, ys))
            pmf = thinkbayes2.MakePmfFromItems(items)
            pmf.Normalize()

            # Store pmf in object to be called on later in Likelihood
            self.hypPmfs.append(pmf)
def MakeNormalModel(weights):
    """Plots a CDF with a Normal model.

    weights: sequence
    """
    cdf = thinkbayes2.Cdf(weights, label='weights')

    mean, var = thinkbayes2.TrimmedMeanVar(weights)
    std = math.sqrt(var)
    print('n, mean, std', len(weights), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std

    xs, ps = thinkbayes2.RenderNormalCdf(mean, std, xmin, xmax)
    thinkplot.plot(xs, ps, label='model', linewidth=4, color='0.8')
    thinkplot.cdf(cdf)
Example #11
0
    def MakePmf(self, filler=None):
        """Makes a PMF of lifetimes.

        filler: value to replace missing values

        returns: Pmf
        """
        cdf = thinkbayes2.Cdf(self.ts, 1 - self.ss)
        pmf = thinkbayes2.Pmf()
        for val, prob in cdf.Items():
            pmf.Set(val, prob)

        cutoff = cdf.ps[-1]
        if filler is not None:
            pmf[filler] = 1 - cutoff

        return pmf
Example #12
0
def PlotSurvival(complete):
    """Plots survival and hazard curves.

    complete: list of complete lifetimes
    """
    thinkplot.PrePlot(3, rows=2)

    cdf = thinkbayes2.Cdf(complete, label='cdf')
    sf = MakeSurvivalFromCdf(cdf, label='survival')
    print(cdf[13])
    print(sf[13])

    thinkplot.Plot(sf)
    thinkplot.Cdf(cdf, alpha=0.2)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    hf = sf.MakeHazardFunction(label='hazard')
    print(hf[39])
    thinkplot.Plot(hf)
    thinkplot.Config(ylim=[0, 0.75])
Example #13
0
def main():
    ComparePriors()

    dataset = [30, 60, 90]

    thinkplot.Clf()
    thinkplot.PrePlot(num=3)

    for high in [500, 1000, 2000]:
        suite = MakePosterior(high, dataset, Train2)
        print(high, suite.Mean())

    thinkplot.Save(root='train3',
                   xlabel='Number of trains',
                   ylabel='Probability')

    interval = suite.Percentile(5), suite.Percentile(95)
    print(interval)

    cdf = thinkbayes2.Cdf(suite)
    interval = cdf.Percentile(5), cdf.Percentile(95)
    print(interval)