Beispiel #1
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
Beispiel #2
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
Beispiel #3
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')

    sample = np.power(10, log_sample)
    mean, median = density.Summarize(sample)

    cdf = thinkstats2.Cdf(sample)
    print('cdf[mean]', cdf[mean])

    pdf = thinkstats2.EstimatedPdf(sample)
    thinkplot.Pdf(pdf)
    thinkplot.Show(xlabel='household income', ylabel='PDF')
Beispiel #4
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')

    sample = np.power(10, log_sample)
    mean = np.mean(sample)
    cdf = thinkstats2.Cdf(sample)
    print "Median:", np.median(sample)
    print "Mean:", mean
    print "Skewness:", thinkstats2.Skewness(sample)
    print "Pearson's Skewness:", thinkstats2.PearsonMedianSkewness(sample)
    print "Percent of people with incomes <= mean:", cdf[mean]
    pdf = thinkstats2.EstimatedPdf(sample)
    thinkplot.Pdf(pdf)
Beispiel #5
0
import hinc
import hinc2
import thinkstats2
import thinkplot
import numpy as np

df = hinc.ReadData()


def describe_inc_dist(log_upper):
    log_sample = hinc2.InterpolateSample(df, log_upper=j)
    incomes = np.power(10, log_sample)

    inc_mean = thinkstats2.Mean(incomes)
    inc_med = thinkstats2.Median(incomes)
    inc_skew = thinkstats2.Skewness(incomes)
    inc_pearskew = thinkstats2.PearsonMedianSkewness(incomes)
    print('log_upper = ', j)
    print('Mean Income: ', inc_mean)
    print('Median Income: ', inc_med)
    print('Skewness: ', inc_skew)
    print('Pearson Median Skewness: ', inc_pearskew)

    cdf = thinkstats2.Cdf(incomes)
    inc_below_mean = cdf.Prob(inc_mean)
    print('Pct. below mean: ', inc_below_mean)
    print('\n')


for j in [6.0, 7.0, 8.0]:
    describe_inc_dist(log_upper=j)
Beispiel #6
0
def main():
    df = hinc.ReadData()
    MakeFigures(df)
Beispiel #7
0
    df.loc[41, 'log_upper'] = log_upper
    
    # use the freq column to generate the right number of values in
    # each range
    arrays = []
    for _, row in df.iterrows():
        vals = np.linspace(row.log_lower, row.log_upper, row.freq)
        arrays.append(vals)

    # collect the arrays into a single sample
    log_sample = np.concatenate(arrays)
    return log_sample
    
# get data
import hinc
income_df = hinc.ReadData()

log_sample = InterpolateSample(income_df, log_upper=6.0)

log_cdf = thinkstats2.Cdf(log_sample)
thinkplot.Cdf(log_cdf)
thinkplot.Config(xlabel='Household income (log $)',
               ylabel='CDF')
# RESULTS: plot

sample = np.power(10, log_sample)

cdf = thinkstats2.Cdf(sample)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Household income ($)',
               ylabel='CDF')
Beispiel #8
0
    def RunModel(self):
        """
        Goal: Use resampling to simulate test data
        Output: Simulated data
        """
        group1 = np.random.choice(self.pool, self.n, replace=True)
        group2 = np.random.choice(self.pool, self.m, replace=True)
        data = group1, group2
        return data


preg = nsfg.ReadFemPreg()
live = preg[preg["outcome"] == 1]
resp = nsfg.ReadFemResp()
bs = brfss.ReadBrfss()
income = hinc.ReadData()
log_intp_income = hinc2.InterpolateSample(income, log_upper=6.0)

# Q1. Think Stats Chapter 2 Exercise 4 (effect size of Cohen's d)
first_wt = live.loc[preg["birthord"] == 1, "totalwgt_lb"]
other_wt = live.loc[preg["birthord"] != 1, "totalwgt_lb"]
ts.CohenEffectSize(first_wt, other_wt)

# Q2. Think Stats Chapter 3 Exercise 1 (actual vs. biased)
d = np.diff(np.unique(resp["numkdhh"])).min()
left_of_first_bin = resp["numkdhh"].min() - float(d) / 2
right_of_last_bin = resp["numkdhh"].max() + float(d) / 2
plt.clf()
plt.hist(resp["numkdhh"],
         bins=np.arange(left_of_first_bin, right_of_last_bin + d, d),
         histtype="step",