def single_plot(dfs): """Make the plot. """ for i, age_group in enumerate(age_groups): print(i, age_group) series = [df.loc[age_group] for df in dfs] models = [fit_row(s).fittedvalues for s in series] xs = series[0].index + 2 rows = thinkstats2.PercentileRows(models, [5, 95]) thinkplot.fill_between(xs, rows[0], rows[1], color=colors[i], alpha=0.3) rows = thinkstats2.PercentileRows(series, [50]) thinkplot.plot(xs, rows[0], label=labels[i], color=colors[i], alpha=0.6) thinkplot.config(xlabel=xlabel, ylabel=ylabel, loc='upper left', axis=axis) plt.gca().get_legend().set(title='Age group') thinkplot.save(root='age_religion2')
def ResampleSurvival(resp, iters=101): """Resamples respondents and estimates the survival function. resp: DataFrame of respondents iters: number of resamples """ _, sf = EstimateMarriageSurvival(resp) thinkplot.Plot(sf) low, high = resp.min(), resp.max() ts = np.arange(low, high, 1/12.0) ss_seq = [] for _ in range(iters): sample = thinkstats2.ResampleRowsWeighted(resp) _, sf = EstimateMarriageSurvival(sample) ss_seq.append(sf.Probs(ts)) low, high = thinkstats2.PercentileRows(ss_seq, [5, 95]) thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI') thinkplot.Save(root='survival3', xlabel='age (years)', ylabel='prob unmarried', xlim=[12, 46], ylim=[0, 1], formats=FORMATS)
def PlotConfidenceIntervals(xs, inters, slopes, res=None, percent=90, **options): """Plots the 90% confidence intervals for weights based on ages. xs: sequence inters: estimated intercepts slopes: estimated slopes res: residuals percent: what percentile range to show """ fys_seq = [] for inter, slope in zip(inters, slopes): fxs, fys = thinkstats2.FitLine(xs, inter, slope) if res is not None: fys += np.random.permutation(res) fys_seq.append(fys) p = (100 - percent) / 2 percents = p, 100 - p low, high = thinkstats2.PercentileRows(fys_seq, percents) thinkplot.FillBetween(fxs, low, high, **options)
def PlotConfidenceIntervals(xs, inters, slopes, percent=90, **options): fys_seq = [] for inter, slope in zip(inters, slopes): fxs, fys = thinkstats2.FitLine(xs, inter, slope) fys_seq.append(fys) p = (100 - percent) / 2 percents = p, 100 - p low, high = thinkstats2.PercentileRows(fys_seq, percents) thinkplot.FillBetween(fxs, low, high, **options)
def PlotPredictions(daily, years, iters=101, percent=90, func=RunLinearModel): """Plots predictions. daily: DataFrame of daily prices years: sequence of times (in years) to make predictions for iters: number of simulations percent: what percentile range to show func: function that fits a model to the data """ result_seq = SimulateResults(daily, iters=iters, func=func) p = (100 - percent) / 2 percents = p, 100 - p predict_seq = GeneratePredictions(result_seq, years, add_resid=True) low, high = thinkstats2.PercentileRows(predict_seq, percents) thinkplot.FillBetween(years, low, high, alpha=0.3, color='gray') predict_seq = GeneratePredictions(result_seq, years, add_resid=False) low, high = thinkstats2.PercentileRows(predict_seq, percents) thinkplot.FillBetween(years, low, high, alpha=0.5, color='gray')
def main(): gss = utils.ReadGss('gss_college_religion') diffs = [run(gss) for _ in range(101)] years = diffs[0].index rows = thinkstats2.PercentileRows(diffs, [5, 50, 95]) thinkplot.fill_between(years, rows[0], rows[2], alpha=0.2) thinkplot.plot(years, rows[1]) thinkplot.config(xlabel='Year', ylabel='Difference in fraction with no affiliation', xlim=[1970, 2018]) thinkplot.save(root='college_religion')
def SimulateAutocorrelation(daily, iters=1001, nlags=40): """Resample residuals, compute autocorrelation, and plot percentiles daily: DataFrame iters: number of simulations to run nlags: maximum lags to compute autocorrelation """ t = [] for _ in range(iters): filled = FillMissing(daily, span=30) resid = thinkstats2.Resample(filled.resid) acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:] t.append(np.abs(acf)) high = thinkstats2.PercentileRows(t, [97.5])[0] low = -high lags = range(1, nlags + 1) thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
def ResampleSurvival(dados, limiar, iters=101): """Resamples respondents and estimates the survival function. resp: DataFrame of respondents iters: number of resamples """ _, sf = EstimateMarriageSurvival(dados, limiar) thinkplot.Plot(sf) low, high = dados.min(), dados.max() ts = np.arange(low, high, 1) ss_seq = [] for _ in range(iters): sample = thinkstats2.ResampleRowsWeighted(pd.DataFrame(dados), column='MANSO') _, sf = EstimateMarriageSurvival(sample['MANSO'], limiar) ss_seq.append(sf.Probs(ts)) low, high = thinkstats2.PercentileRows(ss_seq, [5, 95]) thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
def MakeSurvivalCI(sf_seq, percents): """Makes confidence intervals from a list of survival functions. sf_seq: list of SurvivalFunction percents: list of percentiles to select, like [5, 95] returns: (ts, rows) where ts is a sequence of times and rows contains one row of values for each percent """ # find the union of all ts where the sfs are evaluated ts = set() for sf in sf_seq: ts |= set(sf.ts) ts = list(ts) ts.sort() # evaluate each sf at all times ss_seq = [sf.Probs(ts) for sf in sf_seq if len(sf) > 0] # return the requested percentiles from each column rows = thinkstats2.PercentileRows(ss_seq, percents) return ts, rows