예제 #1
0
def MakeFigures():
    """Make scatterplots.
    """
    thinkstats2.RandomSeed(17)

    df = brfss.ReadBrfss(nrows=None)
    sample = SampleRows(df, 5000, replace=False)

    heights, weights = GetHeightWeight(sample)
    assert (heights.values[100] == 175)
    assert (weights.values[100] == 86.36)

    ScatterPlot('brfss_scatter1', heights, weights)

    heights, weights = GetHeightWeight(sample, hjitter=1.5, wjitter=1.1)
    assert (int(heights.values[100]) == 173)
    assert (int(weights.values[100]) == 85)

    ScatterPlot('brfss_scatter2', heights, weights)
    ScatterPlot('brfss_scatter3', heights, weights, alpha=0.1)

    # make a hexbin of all records
    heights, weights = GetHeightWeight(df, hjitter=1.3, wjitter=1.1)
    assert (int(heights.values[100]) == 171)
    assert (int(weights.values[100]) == 55)
    HexBin('brfss_scatter4', heights, weights)
예제 #2
0
def main(script):
    thinkstats2.RandomSeed(17)

    df = brfss.ReadBrfss(nrows=None)
    df = df.dropna(subset=['htm3', 'wtkg2'])
    Correlations(df)
    return

    MakeFigures(df)
    BinnedPercentiles(df)
예제 #3
0
파일: chap07.py 프로젝트: mecha2k/mystats
def main(script):
    random.seed(100)
    np.random.seed(100)

    df = brfss.ReadBrfss(nrows=None)
    df = df.dropna(subset=["htm3", "wtkg2"])

    Correlations(df)
    MakeFigures(df)
    BinnedPercentiles(df)
예제 #4
0
def ReadHeights(nrows=None):
    """Read the BRFSS dataset, extract the heights and pickle them.

    nrows: number of rows to read
    """
    resp = brfss.ReadBrfss(nrows=nrows).dropna(subset=['sex', 'htm3'])
    groups = resp.groupby('sex')

    d = {}
    for name, group in groups:
        d[name] = group.htm3.values

    return d
예제 #5
0
def main(name, nrows=None):
    thinkstats2.RandomSeed(17)

    if nrows is not None:
        nrows = int(nrows)

    df = brfss.ReadBrfss(nrows=nrows)

    columns = df[['htm3', 'wtkg2']].dropna()
    heights, weights = columns.htm3.values, columns.wtkg2.values

    TestCorrelation(heights, weights)
    if nrows == None:
        ComputeCorrelations(heights, weights)
예제 #6
0
def ComputeSkewnesses():
    """Plots KDE of birthweight and adult weight.
    """
    def VertLine(x, y):
        thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print('Birth weight')
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right')
    VertLine(median, y)
    thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='birth weight')
    thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF')

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print('Adult weight')
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left')
    VertLine(median, y)
    thinkplot.Text(median - 1.5,
                   0.1 * y,
                   'median',
                   horizontalalignment='right')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='adult weight')
    thinkplot.Save(root='density_wtkg2_kde',
                   xlabel='kg',
                   ylabel='PDF',
                   xlim=[0, 200])
예제 #7
0
def ComputeSkewnesses():
    def VertLine(x, y):
        myplots.Plot([x, x], [0, y], color="0.6", linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print("Birth weight")
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    myplots.Text(mean - 0.15, 0.1 * y, "mean", horizontalalignment="right")
    VertLine(median, y)
    myplots.Text(median + 0.1, 0.1 * y, "median", horizontalalignment="left")

    pdf = mystats.EstimatedPdf(data)
    myplots.Pdf(pdf, label="birth weight")
    myplots.Save(root="density_totalwgt_kde", xlabel="lbs", ylabel="PDF")

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print("Adult weight")
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    myplots.Text(mean + 1, 0.1 * y, "mean", horizontalalignment="left")
    VertLine(median, y)
    myplots.Text(median - 1.5, 0.1 * y, "median", horizontalalignment="right")

    pdf = mystats.EstimatedPdf(data)
    myplots.Pdf(pdf, label="adult weight")
    myplots.Save(root="density_wtkg2_kde",
                 xlabel="kg",
                 ylabel="PDF",
                 xlim=[0, 200])
예제 #8
0
thinkplot.Config(xlable='birth weight (pounds)', ylable='PDF')

#%%
mean = RawMoment(data, 1)
print("mean: {:.2f} pounds".format(mean))
medi = Medinan(data)
print("median: {:.2f} pounds".format(medi))
skewness = Skewness(data)
print("skewness: {:.2f} ".format(skewness))
pearson = PearsonMedianSkewness(data)
print("pearson's median skewness: {:.2f}".format(pearson))

#%%
# BRFSS
import brfss
df = brfss.ReadBrfss(nrows=None)
data = df.wtkg2.dropna()
pdf = thinkstats2.EstimatedPdf(data)
thinkplot.Pdf(pdf, label="adult weight")
thinkplot.Config(xlable='weight (kg)', ylable='PDF')

#%%
pdf = thinkstats2.EstimatedPdf(data)
thinkplot.Pdf(pdf, label="adult weight")
thinkplot.Config(xlable='weight (kg)', ylable='PDF', xlim=[0, 200])

#%%
mean = RawMoment(data, 1)
print("mean: {:.1f} kg".format(mean))
medi = Medinan(data)
print("median: {:.1f} kg".format(medi))
예제 #9
0
thinkplot.Plot(xs, ys, label='all live')

xs, ys = thinkstats2.NormalProbability(term_weights)
thinkplot.Plot(xs, ys, label='full term')
thinkplot.Config(title='Normal probability plot',
                 xlabel='Standard deviations from mean',
                 ylabel='Birth weight (lbs)')

#%% [markdown]
# ## Lognormal model
#
# As an example of a lognormal disrtribution, we'll look at adult weights from the BRFSS.

#%%
import brfss
df = brfss.ReadBrfss()
weights = df.wtkg2.dropna()

#%% [markdown]
# The following function estimates the parameters of a normal distribution and plots the data and a normal model.


#%%
def MakeNormalModel(weights):
    """Plots a CDF with a Normal model.

    weights: sequence
    """
    cdf = thinkstats2.Cdf(weights, label='weights')

    mean, var = thinkstats2.TrimmedMeanVar(weights)
예제 #10
0
파일: chap12.py 프로젝트: mecha2k/mystats
def main(script):
    random.seed(100)
    np.random.seed(100)

    df = brfss.ReadBrfss(nrows=None)
    df = df.dropna(subset=["htm3", "wtkg2"])
예제 #11
0
class DiffMeansResample(h0.DiffMeansPermute):
    def RunModel(self):
        """
        Goal: Use resampling to simulate test data
        Output: Simulated data
        """
        group1 = np.random.choice(self.pool, self.n, replace=True)
        group2 = np.random.choice(self.pool, self.m, replace=True)
        data = group1, group2
        return data


preg = nsfg.ReadFemPreg()
live = preg[preg["outcome"] == 1]
resp = nsfg.ReadFemResp()
bs = brfss.ReadBrfss()
income = hinc.ReadData()
log_intp_income = hinc2.InterpolateSample(income, log_upper=6.0)

# Q1. Think Stats Chapter 2 Exercise 4 (effect size of Cohen's d)
first_wt = live.loc[preg["birthord"] == 1, "totalwgt_lb"]
other_wt = live.loc[preg["birthord"] != 1, "totalwgt_lb"]
ts.CohenEffectSize(first_wt, other_wt)

# Q2. Think Stats Chapter 3 Exercise 1 (actual vs. biased)
d = np.diff(np.unique(resp["numkdhh"])).min()
left_of_first_bin = resp["numkdhh"].min() - float(d) / 2
right_of_last_bin = resp["numkdhh"].max() + float(d) / 2
plt.clf()
plt.hist(resp["numkdhh"],
         bins=np.arange(left_of_first_bin, right_of_last_bin + d, d),