Esempio n. 1
0
def PlotResampledByDecade(resps,
                          iters=11,
                          predict_flag=False,
                          omit=None,
                          weighted=True):
    """Plots survival curves for resampled data.

    resps: list of DataFrames
    iters: number of resamples to plot
    predict_flag: whether to also plot predictions
    """
    for i in range(iters):
        if weighted:
            samples = [
                thinkstats2.ResampleRowsWeighted(resp) for resp in resps
            ]
        else:
            samples = [thinkstats2.ResampleRows(resp) for resp in resps]
        sample = pandas.concat(samples, ignore_index=True)
        groups = sample.groupby('decade')

        if omit:
            groups = [(name, group) for name, group in groups
                      if name not in omit]

        # TODO: refactor this to collect resampled estimates and
        # plot shaded areas
        if i == 0:
            AddLabelsByDecade(groups, alpha=0.7)

        if predict_flag:
            PlotPredictionsByDecade(groups, alpha=0.1)
            EstimateSurvivalByDecade(groups, alpha=0.1)
        else:
            EstimateSurvivalByDecade(groups, alpha=0.2)
def SamplingDistributions(dados_chuva, dados_vazao, iters=101):
    dados = pd.DataFrame([dados_chuva, dados_vazao])
    dados = dados.T
    t = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRows(dados)
        chuva = sample["COIMBRA_P"]
        vazao = sample["COIMBRA_F"]
        estimates = thinkstats2.LeastSquares(chuva, vazao)
        t.append(estimates)

    inters, slopes = zip(*t)
    return inters, slopes
Esempio n. 3
0
def SamplingDistributions(live, iters=101):
    """Estimates sampling distributions by resampling rows.

    live: DataFrame
    iters: number of times to run simulations

    returns: pair of sequences (inters, slopes)
    """
    t = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRows(live)
        ages = sample.agepreg
        weights = sample.totalwgt_lb
        estimates = thinkstats2.LeastSquares(ages, weights)
        t.append(estimates)

    inters, slopes = zip(*t)
    return inters, slopes
Esempio n. 4
0
def EstimateBirthWeight(live, iters=1001):
    """Estimate mean birth weight by resampling, with and without weights.

    live: DataFrame
    iters: number of experiments to run
    """

    mean = live.totalwgt_lb.mean()
    print('mean', mean)

    estimates = [
        thinkstats2.ResampleRows(live).totalwgt_lb.mean() for _ in range(iters)
    ]
    Summarize(estimates)

    estimates = [
        ResampleRowsWeighted(live).totalwgt_lb.mean() for _ in range(iters)
    ]
    Summarize(estimates)
print("Standard deviation w/o height: {:.3f}".format(std_ys))

#%%
# calc standard deviation (RMSE) of prediction w/ height
std_res = thinkstats2.Std(res)
print("Standard deviation w/ height: {:.3f}".format(std_res))

#%%
# How does RMSE get impacted by height info
print("Impact: {:.3f}".format(1 - (std_res / std_ys)))

#%%
# Resampling to compute inter and slope
t = []
for _ in range(100):
    sample = thinkstats2.ResampleRows(data)
    estimates = thinkstats2.LeastSquares(sample.htm3, np.log10(sample.wtkg2))
    t.append(estimates)

inters, slopes = zip(*t)

#%%
# Plot the sampling distribution of slope.
cdf = thinkstats2.Cdf(slopes)
thinkplot.Cdf(cdf)

#%%
# Compute the p-value of the slope.
pvalue = cdf[0]
pvalue