Esempio n. 1
0
def PlotConfidenceIntervals(xs,
                            inters,
                            slopes,
                            res=None,
                            percent=90,
                            **options):
    """Plots the 90% confidence intervals for weights based on ages.

    xs: sequence
    inters: estimated intercepts
    slopes: estimated slopes
    res: residuals
    percent: what percentile range to show
    """
    fys_seq = []
    for inter, slope in zip(inters, slopes):
        fxs, fys = thinkstats2.FitLine(xs, inter, slope)
        if res is not None:
            fys += np.random.permutation(res)
        fys_seq.append(fys)

    p = (100 - percent) / 2
    percents = p, 100 - p
    low, high = thinkstats2.PercentileRows(fys_seq, percents)
    thinkplot.FillBetween(fxs, low, high, **options)
Esempio n. 2
0
def ResampleSurvival(resp, iters=101):
    """Resamples respondents and estimates the survival function.

    resp: DataFrame of respondents
    iters: number of resamples
    """ 
    _, sf = EstimateMarriageSurvival(resp)
    thinkplot.Plot(sf)

    low, high = resp.min(), resp.max()
    ts = np.arange(low, high, 1/12.0)

    ss_seq = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRowsWeighted(resp)
        _, sf = EstimateMarriageSurvival(sample)
        ss_seq.append(sf.Probs(ts))

    low, high = thinkstats2.PercentileRows(ss_seq, [5, 95])
    thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
    thinkplot.Save(root='survival3',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[12, 46],
                   ylim=[0, 1],
                   formats=FORMATS)
Esempio n. 3
0
def PlotSurvivalFunctions(sf_map, predict_flag=False):
    """Plot estimated survival functions.

    sf_map: map from group name to sequence of survival functions
    predict_flag: whether the lines are predicted or actual
    """
    thinkplot.PrePlot(len(sf_map))

    for name, sf_seq in sorted(sf_map.items(), reverse=True):
        if len(sf_seq) == 0:
            continue

        sf = sf_seq[0]
        if len(sf) == 0:
            continue

        ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90])
        thinkplot.FillBetween(ts, rows[0], rows[2], color='gray')

        if not predict_flag:
            thinkplot.Plot(ts, rows[1], label='19%d'%name)

    thinkplot.Config(xlabel='age (years)', ylabel='prob unmarried',
                     xlim=[14, 45], ylim=[0, 1],
                     legend=True, loc='upper right')
Esempio n. 4
0
def SimulateAutocorrelation(daily, iters=1001, nlags=40):
    """Resample residuals, compute autocorrelation, and plot percentiles.

    daily:
    iters:
    nlags:
    """
    # run simulations
    t = []
    for i in range(iters):
        filled = FillMissing(daily, span=30)
        resid = thinkstats2.Resample(filled.resid)
        acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:]
        t.append(np.abs(acf))

    # put the results in an array and sort the columns
    size = iters, len(acf)
    array = np.zeros(size)
    for i, acf in enumerate(t):
        array[i, ] = acf
    array = np.sort(array, axis=0)

    # find the bounds that cover 95% of the distribution
    high = PercentileRow(array, 97.5)
    low = -high
    lags = range(1, nlags + 1)
    thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
Esempio n. 5
0
def PlotSurvivalFunctions(sf_map, predict_flag=False, colormap=None):
    """Plot estimated survival functions.

    sf_map: map from group name to sequence of survival functions
    predict_flag: whether the lines are predicted or actual
    colormap: map from group name to color
    """
    thinkplot.PrePlot(num=len(sf_map))

    for name, sf_seq in sorted(sf_map.items(), reverse=True):
        if len(sf_seq) == 0:
            continue

        sf = sf_seq[0]
        if len(sf) == 0:
            continue

        ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90])
        thinkplot.FillBetween(ts, rows[0], rows[2], color='gray', alpha=0.2)

        if not predict_flag:
            if colormap:
                color = colormap[name]
                thinkplot.Plot(ts, rows[1], label='%ds' % name, color=color)
            else:
                thinkplot.Plot(ts, rows[1], label='%ds' % name)
def PlotConfidenceIntervals(xs, inters, slopes, percent=90, **options):
    fys_seq = []
    for inter, slope in zip(inters, slopes):
        fxs, fys = thinkstats2.FitLine(xs, inter, slope)
        fys_seq.append(fys)

    p = (100 - percent) / 2
    percents = p, 100 - p
    low, high = thinkstats2.PercentileRows(fys_seq, percents)
    thinkplot.FillBetween(fxs, low, high, **options)
def PlotPredictions(daily, years, iters=101, percent=90, func=RunLinearModel):
    """Plots predictions.

    daily: DataFrame of daily prices
    years: sequence of times (in years) to make predictions for
    iters: number of simulations
    percent: what percentile range to show
    func: function that fits a model to the data
    """
    result_seq = SimulateResults(daily, iters=iters, func=func)
    p = (100 - percent) / 2
    percents = p, 100 - p

    predict_seq = GeneratePredictions(result_seq, years, add_resid=True)
    low, high = thinkstats2.PercentileRows(predict_seq, percents)
    thinkplot.FillBetween(years, low, high, alpha=0.3, color='gray')

    predict_seq = GeneratePredictions(result_seq, years, add_resid=False)
    low, high = thinkstats2.PercentileRows(predict_seq, percents)
    thinkplot.FillBetween(years, low, high, alpha=0.5, color='gray')
Esempio n. 8
0
def PlotIntervals(daily, years, iters=101, percent=90, func=RunLinearModel):
    """Plots predictions based on different intervals.

    daily: DataFrame of daily prices
    years: sequence of times (in years) to make predictions for
    iters: number of simulations
    percent: what percentile range to show
    func: function that fits a model to the data
    """
    result_seq = SimulateIntervals(daily, iters=iters, func=func)
    p = (100 - percent) / 2

    predictions = GeneratePredictions(result_seq, years, add_resid=True)
    low = PercentileRow(predictions, p)
    high = PercentileRow(predictions, 100 - p)
    thinkplot.FillBetween(years, low, high, alpha=0.1, color='gray')
Esempio n. 9
0
def SimulateAutocorrelation(daily, iters=1001, nlags=40):
    """Resample residuals, compute autocorrelation, and plot percentiles

    daily: DataFrame
    iters: number of simulations to run
    nlags: maximum lags to compute autocorrelation
    """
    t = []
    for _ in range(iters):
        filled = FillMissing(daily, span=30)
        resid = thinkstats2.Resample(filled.resid)
        acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:]
        t.append(np.abs(acf))

    high = thinkstats2.PercentileRows(t, [97.5])[0]
    low = -high
    lags = range(1, nlags + 1)
    thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
def ResampleSurvival(dados, limiar, iters=101):
    """Resamples respondents and estimates the survival function.

    resp: DataFrame of respondents
    iters: number of resamples
    """ 
    _, sf = EstimateMarriageSurvival(dados, limiar)
    thinkplot.Plot(sf)

    low, high = dados.min(), dados.max()
    ts = np.arange(low, high, 1)

    ss_seq = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRowsWeighted(pd.DataFrame(dados), column='MANSO')
        _, sf = EstimateMarriageSurvival(sample['MANSO'], limiar)
        ss_seq.append(sf.Probs(ts))
    
    low, high = thinkstats2.PercentileRows(ss_seq, [5, 95])
    thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
Esempio n. 11
0
def PlotConfidenceIntervals(xs,
                            inters,
                            slopes,
                            res=None,
                            percent=90,
                            **options):
    """Plots the 90% confidence intervals for weights based on ages.

    xs: sequence
    inters: estimated intercepts
    slopes: estimated slopes
    res: residuals
    percent: what percentile range to show
    """
    size = len(slopes), len(xs)
    array = np.zeros(size)

    for i, (inter, slope) in enumerate(zip(inters, slopes)):
        fxs, fys = thinkstats2.FitLine(xs, inter, slope)
        if res is not None:
            fys += np.random.permutation(res)
        array[i, ] = fys

    array = np.sort(array, axis=0)

    def Percentile(p):
        """Selects the line from array that corresponds to percentile p.

        p: float 0--100

        returns: NumPy array (one row)
        """
        index = int(len(slopes) * p / 100)
        return array[index, ]

    p = (100 - percent) / 2
    #low = thinkstats2.Smooth(Percentile(p))
    #high = thinkstats2.Smooth(Percentile(100-p))
    low = Percentile(p)
    high = Percentile(100 - p)
    thinkplot.FillBetween(fxs, low, high, **options)