def PlotResampledByDecade(resps, iters=11, predict_flag=False, omit=None, weighted=True): """Plots survival curves for resampled data. resps: list of DataFrames iters: number of resamples to plot predict_flag: whether to also plot predictions """ for i in range(iters): if weighted: samples = [ thinkstats2.ResampleRowsWeighted(resp) for resp in resps ] else: samples = [thinkstats2.ResampleRows(resp) for resp in resps] sample = pandas.concat(samples, ignore_index=True) groups = sample.groupby('decade') if omit: groups = [(name, group) for name, group in groups if name not in omit] # TODO: refactor this to collect resampled estimates and # plot shaded areas if i == 0: AddLabelsByDecade(groups, alpha=0.7) if predict_flag: PlotPredictionsByDecade(groups, alpha=0.1) EstimateSurvivalByDecade(groups, alpha=0.1) else: EstimateSurvivalByDecade(groups, alpha=0.2)
def SamplingDistributions(dados_chuva, dados_vazao, iters=101): dados = pd.DataFrame([dados_chuva, dados_vazao]) dados = dados.T t = [] for _ in range(iters): sample = thinkstats2.ResampleRows(dados) chuva = sample["COIMBRA_P"] vazao = sample["COIMBRA_F"] estimates = thinkstats2.LeastSquares(chuva, vazao) t.append(estimates) inters, slopes = zip(*t) return inters, slopes
def SamplingDistributions(live, iters=101): """Estimates sampling distributions by resampling rows. live: DataFrame iters: number of times to run simulations returns: pair of sequences (inters, slopes) """ t = [] for _ in range(iters): sample = thinkstats2.ResampleRows(live) ages = sample.agepreg weights = sample.totalwgt_lb estimates = thinkstats2.LeastSquares(ages, weights) t.append(estimates) inters, slopes = zip(*t) return inters, slopes
def EstimateBirthWeight(live, iters=1001): """Estimate mean birth weight by resampling, with and without weights. live: DataFrame iters: number of experiments to run """ mean = live.totalwgt_lb.mean() print('mean', mean) estimates = [ thinkstats2.ResampleRows(live).totalwgt_lb.mean() for _ in range(iters) ] Summarize(estimates) estimates = [ ResampleRowsWeighted(live).totalwgt_lb.mean() for _ in range(iters) ] Summarize(estimates)
print("Standard deviation w/o height: {:.3f}".format(std_ys)) #%% # calc standard deviation (RMSE) of prediction w/ height std_res = thinkstats2.Std(res) print("Standard deviation w/ height: {:.3f}".format(std_res)) #%% # How does RMSE get impacted by height info print("Impact: {:.3f}".format(1 - (std_res / std_ys))) #%% # Resampling to compute inter and slope t = [] for _ in range(100): sample = thinkstats2.ResampleRows(data) estimates = thinkstats2.LeastSquares(sample.htm3, np.log10(sample.wtkg2)) t.append(estimates) inters, slopes = zip(*t) #%% # Plot the sampling distribution of slope. cdf = thinkstats2.Cdf(slopes) thinkplot.Cdf(cdf) #%% # Compute the p-value of the slope. pvalue = cdf[0] pvalue