Example #1
0
def main():
    print("Single Die:")
    d6 = Die(6)
    print(d6)

    #use thinkbayes to simulate
    dice = [d6] * 3
    three = SampleSum(dice, 5000)
    print("##################################")
    print("Three Die:")
    print(three)

    #use thinkbayes to enumerate
    three_exact = d6 + d6 + d6
    print("##################################")
    print("Exact Three Die:")
    print(three_exact)

    # Use Allen Downey's thinkplot module to create a graph
    thinkplot.PrePlot(1)
    thinkplot.Plot(three)
    thinkplot.Plot(three_exact)
    thinkplot.Save(root='DD1',
                   xlabel='Sum of 3 d6',
                   ylabel='Probability',
                   formats=['pdf'])

    print("Program Complete")
Example #2
0
def main():
    euro = Euro(range(101))
    euro.label = "Uniform prior"
    euro_triangleprior = Euro(range(101), triangle_prior=True)
    euro_triangleprior.label = "Triangle prior"

    for data in range(140):
        euro.Update('H')
        euro_triangleprior.Update('H')

    for data in range(110):
        euro.Update('T')
        euro_triangleprior.Update('T')

    print("Summary for uniform prior: ")
    summarize_posterior(euro)

    print("Summary for triangle prior: ")
    summarize_posterior(euro_triangleprior)

    # Use Allen Downey's thinkplot module to create a graph
    thinkplot.PrePlot(1)
    thinkplot.Plot(euro)
    thinkplot.Plot(euro_triangleprior)
    thinkplot.Save(root='euro2',
                   xlabel='Bias of heads vs. tails',
                   ylabel='Probability',
                   formats=['pdf'])
Example #3
0
def ScatterFit(xs, ys, **options):
    inter, slope = LeastSquares(xs, ys)
    fit_xs, fit_ys = FitLine(xs, inter, slope)
    thinkplot.Scatter(xs, ys, color='blue', alpha=0.1, s=10)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='red', linewidth=2)
    thinkplot.Show(legend=False, **options)
Example #4
0
def PlotArrivalDepartureDelayFit(flights):
    """Plots a scatter plot and fitted curve.

    live: DataFrame
    """

    sample = thinkstats2.SampleRows(flights, 1000)
    arrivalDelays = sample.ARRIVAL_DELAY
    departureDelays = sample.DEPARTURE_DELAY
    inter, slope = thinkstats2.LeastSquares(arrivalDelays, departureDelays)
    fit_xs, fit_ys = thinkstats2.FitLine(arrivalDelays, inter, slope)

    thinkplot.Scatter(arrivalDelays, departureDelays, color='gray', alpha=0.1)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='blue', linewidth=2)
    thinkplot.Save(
        root='ArrivalDepartureDelayFit_linear1',
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                   axis=[10, 45, 0, 15],
        legend=False)

    formula = 'DEPARTURE_DELAY ~ ARRIVAL_DELAY'
    model = smf.ols(formula, data=sample)
    results = model.fit()
    regression.SummarizeResults(results)
Example #5
0
def PlotOptimalBid():
    """Plots optimal bid vs estimated price.
    """
    player1, player2 = MakePlayers()
    guesses = numpy.linspace(15000, 60000, 21)

    res = []
    for guess in guesses:
        player1.MakeBeliefs(guess)

        mean = player1.posterior.Mean()
        mle = player1.posterior.MaximumLikelihood()

        calc = GainCalculator(player1, player2)
        bids, gains = calc.ExpectedGains()
        gain, bid = max(zip(gains, bids))

        res.append((guess, mean, mle, gain, bid))

    guesses, means, _mles, gains, bids = zip(*res)
    
    thinkplot.PrePlot(num=3)
    pyplot.plot([15000, 60000], [15000, 60000], color='gray')
    thinkplot.Plot(guesses, means, label='mean')
    #thinkplot.Plot(guesses, mles, label='MLE')
    thinkplot.Plot(guesses, bids, label='bid')
    thinkplot.Plot(guesses, gains, label='gain')
    thinkplot.Save(root='price6',
                   xlabel='guessed price ($)',
                   formats=FORMATS)
Example #6
0
def PlotCdf(cdf):
    """Plots the actual and fitted distributions.

    cdf: CDF object
    """
    xs, ps = cdf.xs, cdf.ps
    cps = [1 - p for p in ps]

    # CCDF on logy scale: shows exponential behavior
    thinkplot.Clf()
    thinkplot.Plot(xs, cps, 'bo-')
    thinkplot.Save(root='kidney1',
                   formats=FORMATS,
                   xlabel='RDT',
                   ylabel='CCDF (log scale)',
                   yscale='log')

    # CDF, model and data

    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    mxs, mys = ModelCdf()
    thinkplot.Plot(mxs, mys, label='model', linestyle='dashed')

    thinkplot.Plot(xs, ps, 'gs', label='data')
    thinkplot.Save(root='kidney2',
                   formats=FORMATS,
                   xlabel='RDT (volume doublings per year)',
                   ylabel='CDF',
                   title='Distribution of RDT',
                   axis=[-2, 7, 0, 1],
                   loc=4)
Example #7
0
def PlotRemainingLifetime(sf1, sf2):
    """Plots remaining lifetimes for pregnancy and age at first marriage.

    sf1: SurvivalFunction for pregnancy length
    sf2: SurvivalFunction for age at first marriage
    """
    thinkplot.PrePlot(cols=2)
    rem_life1 = sf1.RemainingLifetime()
    thinkplot.Plot(rem_life1)
    thinkplot.Config(title='remaining pregnancy length',
                     xlabel='weeks',
                     ylabel='mean remaining weeks')

    thinkplot.SubPlot(2)
    func = lambda pmf: pmf.Percentile(50)
    rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func)
    thinkplot.Plot(rem_life2)
    thinkplot.Config(title='years until first marriage',
                     ylim=[0, 15],
                     xlim=[11, 31],
                     xlabel='age (years)',
                     ylabel='median remaining years')

    thinkplot.Save(root='survival6',
                   formats=FORMATS)
Example #8
0
def ex3():
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lam = 4
    goal_totals = [SimulateGame(lam=lam) for _ in range(1000)]
    print('RMSE', RMSE(goal_totals, lam))
    hist = thinkstats2.Hist(goal_totals)
    cdf = thinkstats2.Cdf(goal_totals)
    thinkplot.PrePlot(rows=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf)
    VertLine(cdf.Percentile(5))
    VertLine(cdf.Percentile(95))
    thinkplot.SubPlot(3)

    # lambda vs. rmse
    # rmse goes up as lambda goes up
    lams = range(1, 15)
    rmses = [RMSE([SimulateGame(lam=l) for _ in range(1000)], l) for l in lams]
    thinkplot.Plot(lams, rmses)
    thinkplot.SubPlot(4)

    # m vs. rmse
    # maybe rmse very slowly goes down as m goes up?
    # not at all clear that's really the case...
    ms = np.arange(10, 1000, 10)
    rmses = [RMSE([SimulateGame() for _ in range(m)], 4) for m in ms]
    thinkplot.Plot(ms, rmses)

    thinkplot.show()
Example #9
0
def PlotSurvivalFunctions(sf_map, predict_flag=False, colormap=None):
    """Plot estimated survival functions.

    sf_map: map from group name to sequence of survival functions
    predict_flag: whether the lines are predicted or actual
    colormap: map from group name to color
    """
    thinkplot.PrePlot(num=len(sf_map))

    for name, sf_seq in sorted(sf_map.items(), reverse=True):
        if len(sf_seq) == 0:
            continue

        sf = sf_seq[0]
        if len(sf) == 0:
            continue

        ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90])
        thinkplot.FillBetween(ts, rows[0], rows[2], color='gray', alpha=0.2)

        if not predict_flag:
            if colormap:
                color = colormap[name]
                thinkplot.Plot(ts, rows[1], label='%ds' % name, color=color)
            else:
                thinkplot.Plot(ts, rows[1], label='%ds' % name)
Example #10
0
 def plot(self, high=None, **options):
     """Plots amplitude vs frequency.
     Note: if this is a full spectrum, it ignores low and high
     high: frequency to cut off at
     """
     if self.full:
         fs, amps = self.render_full(high)
         thinkplot.Plot(fs, amps, **options)
     else:
         i = None if high is None else find_index(high, self.fs)
         thinkplot.Plot(self.fs[:i], self.amps[:i], **options)
Example #11
0
def PlotConditionalSurvival(ts, ps):
    """Plot the probability of surviving an addition 5 or 10 years
    conditioned on surviving at least t.
    """
    thinkplot.Clf()
    p5s, p10s = zip(*ps)
    thinkplot.Plot(ts, p5s, linewidth=2, color='blue', label='5 years')
    thinkplot.Plot(ts, p10s, linewidth=2, color='green', label='10 years')
    thinkplot.Save(root='seer5',
                   title='',
                   xlabel='Survival time (years)',
                   ylabel='Probability')
Example #12
0
def SimulateManyGames(lam, iters=1000000):
    lam_est = []
    for _ in np.arange(iters):
        lam_est.append(SimulateGame(lam))
    print('Mean Error =', MeanError(lam_est, lam))
    print('RMSE =', RMSE(lam_est, lam))
    lam_cdf = thinkstats2.Cdf(lam_est)
    ci = lam_cdf.Percentile(5), lam_cdf.Percentile(95)
    lam_pmf = thinkstats2.Pmf(lam_est)
    thinkplot.Cdf(lam_cdf)
    thinkplot.Plot([ci[0], ci[0]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Plot([ci[1], ci[1]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Config(xlabel='Goals per game', ylabel='CDF', legend=False)
Example #13
0
def SampleDistrPLot(estimates, n, lam):
    label = 'n=%d' % n
    cdf = thinkstats2.Cdf(estimates, label=label)
    conf_int = cdf.Percentile(5), cdf.Percentile(95)
    stderr = RMSE(estimates, lam)
    print('n=', n, 'Std Error=', stderr, 'Conf Int=', conf_int)
    thinkplot.Cdf(cdf)
    thinkplot.Plot([conf_int[0], conf_int[0]], [0, 1],
                   color='0.8',
                   linewidth=2)
    thinkplot.Plot([conf_int[1], conf_int[1]], [0, 1],
                   color='0.8',
                   linewidth=2)
Example #14
0
def CycleExtract(fw, data, pnum, trial, plane, marker, plot, plot2):

    if fw == 'AFO':
        choicedata = data[0]
    elif fw == 'PPAFO':
        choicedata = data[1]
    elif fw == 'Shoes':
        choicedata = data[2]

    strike_charac, strike_loc = HeelStrike(fw, data, pnum, trial, marker, plot)
    num_cycles = len(strike_charac)

    dataframe = choicedata[pnum].GetTrial(trial).GetData(plane)

    cycle_set = []

    for i in range(num_cycles - 1):

        start_rowindex = strike_charac[i][0] + 40
        end_rowindex = strike_charac[i + 1][0] + 50

        cycle = dataframe[start_rowindex:end_rowindex]
        index = range(start_rowindex, end_rowindex, 1)

        cycle_set.append((index, cycle))

    if plot2 == True:
        for j in range(len(cycle_set)):

            index, cycle = cycle_set[j]

            thinkplot.Plot(dataframe['R_HEEL'],
                           color='blue',
                           label='Right full set')
            thinkplot.Plot(index,
                           cycle['R_HEEL'],
                           color='red',
                           label='Right cycle set')
            thinkplot.Show(legend=True)

            thinkplot.Plot(dataframe['L_HEEL'],
                           color='blue',
                           label='Left full set')
            thinkplot.Plot(index,
                           cycle['L_HEEL'],
                           color='red',
                           label='Left cycle set')
            thinkplot.Show(legend=True)

    return cycle_set
def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')
Example #16
0
def MakeNormalPlot(weights):
    """Generates a normal probability plot of birth weights.

    weights: sequence
    """
    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-5, 5]
    xs, ys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(xs, ys, color='0.8', label='model')

    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='weights')
Example #17
0
def main(script):

    MakeChangePlot()

    options = dict(linewidth=3, markersize=0, alpha=0.7)
    data = ReadData('heri14.csv')

    # plot nones

    nones = GetColumn(data, 1)
    # del nones[1966]
    ts, ys = RenderColumn(nones)

    MakePlot(ts, ys, model='ys ~ ts + t2')

    pyplot.plot(ts, ys, 'bs-', label='No religion', **options)
    
    # add the actual value from 2014
    thinkplot.Plot([2014], [27.5], 'bs')

    thinkplot.Save(root='heri14.1',
                formats=FORMATS,
                ylabel='Percent',
                loc=2,
                axis=[1967, UPPER, 0, 30])


    # plot attendance

    attendance = GetColumn(data, 4)
    del attendance[1966]
    ts, ys = RenderColumn(attendance)
    ys = [100-y for y in ys]

    MakePlot(ts, ys, model='ys ~ ts + t2')

    pyplot.plot(ts, ys, 'go-', label='No attendance', **options)

    # add the actual value from 2014
    thinkplot.Plot([2014], [100 - 70.7], 'gs')

    thinkplot.Save(root='heri14.2',
                formats=FORMATS,
                ylabel='Percent',
                loc=2,
                axis=[1967, UPPER, 0, 30])

    MakeGenderPlot()
Example #18
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()
Example #19
0
def PlotEwmaPredictions(daily, name):
    """
    """

    # use EWMA to estimate slopes
    filled = timeseries.FillMissing(daily)
    filled['slope'] = pandas.ewma(filled.ppg.diff(), span=180)
    filled[-1:]

    # extract the last inter and slope
    start = filled.index[-1]
    inter = filled.ewma[-1]
    slope = filled.slope[-1]

    # reindex the DataFrame, adding a year to the end
    dates = pandas.date_range(filled.index.min(),
                              filled.index.max() + np.timedelta64(365, 'D'))
    predicted = filled.reindex(dates)

    # generate predicted values and add them to the end
    predicted['date'] = predicted.index
    one_day = np.timedelta64(1, 'D')
    predicted['days'] = (predicted.date - start) / one_day
    predict = inter + slope * predicted.days
    predicted.ewma.fillna(predict, inplace=True)

    # plot the actual values and predictions
    thinkplot.Scatter(daily.ppg, alpha=0.1, label=name)
    thinkplot.Plot(predicted.ewma)
    thinkplot.Save()
Example #20
0
def ResampleSurvival(resp, iters=101):
    """Resamples respondents and estimates the survival function.

    resp: DataFrame of respondents
    iters: number of resamples
    """ 
    _, sf = EstimateMarriageSurvival(resp)
    thinkplot.Plot(sf)

    low, high = resp.min(), resp.max()
    ts = np.arange(low, high, 1/12.0)

    ss_seq = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRowsWeighted(resp)
        _, sf = EstimateMarriageSurvival(sample)
        ss_seq.append(sf.Probs(ts))

    low, high = thinkstats2.PercentileRows(ss_seq, [5, 95])
    thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
    thinkplot.Save(root='survival3',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[12, 46],
                   ylim=[0, 1],
                   formats=FORMATS)
def MakeNormalPlot(x):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(df[x], p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(df[x])
    thinkplot.Plot(xs, ys, label='Number of Crimes')
    thinkplot.Show(title='Normal Prob Plot: {}'.format(x),
                   xlabel='Standard deviations from mean',
                   ylabel='Number of Crimes')
Example #22
0
    def MakePlot(self, root='redline4'):
        """Makes a plot showing the mixture."""
        thinkplot.Clf()

        # plot the MetaPmf
        for pmf, prob in sorted(self.metapmf.Items()):
            cdf = pmf.MakeCdf().Scale(1.0 / 60)
            width = 2 / math.log(-math.log(prob))
            thinkplot.Plot(cdf.xs,
                           cdf.ps,
                           alpha=0.2,
                           linewidth=width,
                           color='blue',
                           label='')

        # plot the mixture and the distribution based on a point estimate
        thinkplot.PrePlot(2)
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
        thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0 / 60))

        thinkplot.Save(root=root,
                       xlabel='Wait time (min)',
                       ylabel='CDF',
                       formats=FORMATS,
                       axis=[0, 10, 0, 1])
Example #23
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)
Example #24
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)
Example #25
0
def main():
    euro = Euro(range(101))
    euro.label = "Posterior probability of various biases"

    for data in range(140):
        euro.Update('H')

    for data in range(110):
        euro.Update('T')

    euro.Print()

    print("Mean hypothesis = {}".format(euro.Mean()))
    print("Median hypothesis = {}".format(euro.Median()))
    print("Hypothesis with maximum likelihood = {}".format(
        str(euro.MaximumLikelihood())))
    print("90% credible interval = {}".format(str(euro.CredibleInterval())))
    # euro.Print()

    # Use Allen Downey's thinkplot module to create a graph
    thinkplot.PrePlot(1)
    thinkplot.Plot(euro, style='')
    thinkplot.Save(root='euro1',
                   xlabel='Bias of heads vs. tails',
                   ylabel='Probability',
                   formats=['pdf'])
Example #26
0
def PlotSurvivalFunctions(sf_map, predict_flag=False):
    """Plot estimated survival functions.

    sf_map: map from group name to sequence of survival functions
    predict_flag: whether the lines are predicted or actual
    """
    thinkplot.PrePlot(len(sf_map))

    for name, sf_seq in sorted(sf_map.items(), reverse=True):
        if len(sf_seq) == 0:
            continue

        sf = sf_seq[0]
        if len(sf) == 0:
            continue

        ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90])
        thinkplot.FillBetween(ts, rows[0], rows[2], color='gray')

        if not predict_flag:
            thinkplot.Plot(ts, rows[1], label='19%d'%name)

    thinkplot.Config(xlabel='age (years)', ylabel='prob unmarried',
                     xlim=[14, 45], ylim=[0, 1],
                     legend=True, loc='upper right')
Example #27
0
def PlotJointDist(pmf1, pmf2, thresh=0.8):
    """Plot the joint distribution of p_correct.

    pmf1, pmf2: posterior distributions
    thresh: lower bound of the range to be plotted
    """
    def Clean(pmf):
        """Removes values below thresh."""
        vals = [val for val in pmf.Values() if val < thresh]
        [pmf.Remove(val) for val in vals]

    Clean(pmf1)
    Clean(pmf2)
    pmf = thinkbayes2.MakeJoint(pmf1, pmf2)

    thinkplot.Figure(figsize=(6, 6))
    thinkplot.Contour(pmf, contour=False, pcolor=True)

    thinkplot.Plot([thresh, 1.0], [thresh, 1.0],
                   color='gray',
                   alpha=0.2,
                   linewidth=4)

    thinkplot.Save(root='sat_joint',
                   xlabel='p_correct Alice',
                   ylabel='p_correct Bob',
                   axis=[thresh, 1.0, thresh, 1.0],
                   formats=['pdf', 'eps'])
Example #28
0
def PlotResiduals(live):
    """Plots percentiles of the residuals.

    live: DataFrame
    """
    ages = live.agepreg
    weights = live.totalwgt_lb
    inter, slope = thinkstats2.LeastSquares(ages, weights)
    live['residual'] = thinkstats2.Residuals(ages, weights, inter, slope)

    bins = np.arange(10, 48, 3)
    indices = np.digitize(live.agepreg, bins)
    groups = live.groupby(indices)

    ages = [group.agepreg.mean() for _, group in groups][1:-1]
    cdfs = [thinkstats2.Cdf(group.residual) for _, group in groups][1:-1]

    thinkplot.PrePlot(3)
    for percent in [75, 50, 25]:
        weights = [cdf.Percentile(percent) for cdf in cdfs]
        label = '%dth' % percent
        thinkplot.Plot(ages, weights, label=label)

    thinkplot.Save(root='linear2',
                   xlabel='age (years)',
                   ylabel='residual (lbs)',
                   xlim=[10, 45])
Example #29
0
def MakeNormalModel(arrivalDelays):
    """Plot the CDF of arrival delays with a normal model.
       This is a modified copy from analytic.py
    """

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(arrivalDelays, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(arrivalDelays, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='NormalModel_arrivaldelay_model',
                   title='Arrival Delays',
                   xlabel='arrival delays (min)',
                   ylabel='CDF')
def PlotResiduals(model, results):
    """Plots the residuals of a model.

    model: StatsModel model object
    results: StatsModel results object    
    """
    years = model.exog[:, 1]
    thinkplot.Plot(years, results.resid, linewidth=0.5, alpha=0.5)