Example #1
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
Example #2
0
def Summarize(data):
    mean = data.mean()
    std = data.std()
    median = thinkstats2.Median(data)
    print('mean', mean)
    print('std', std)
    print('median', median)
    print('skewness', thinkstats2.Skewness(data))
    print('pearson skewness', thinkstats2.PearsonMedianSkewness(data))

    return mean, median
Example #3
0
def Summarize(data):
    """Prints summary statistics.

    data: pandas Series
    """
    mean = data.mean()
    std = data.std()
    median = thinkstats2.Median(data)
    print('mean', mean)
    print('std', std)
    print('median', median)
    print('skewness', thinkstats2.Skewness(data))
    print('pearson skewness', thinkstats2.PearsonMedianSkewness(data))

    return mean, median
Example #4
0
def describe_inc_dist(log_upper):
    log_sample = hinc2.InterpolateSample(df, log_upper=j)
    incomes = np.power(10, log_sample)

    inc_mean = thinkstats2.Mean(incomes)
    inc_med = thinkstats2.Median(incomes)
    inc_skew = thinkstats2.Skewness(incomes)
    inc_pearskew = thinkstats2.PearsonMedianSkewness(incomes)
    print('log_upper = ', j)
    print('Mean Income: ', inc_mean)
    print('Median Income: ', inc_med)
    print('Skewness: ', inc_skew)
    print('Pearson Median Skewness: ', inc_pearskew)

    cdf = thinkstats2.Cdf(incomes)
    inc_below_mean = cdf.Prob(inc_mean)
    print('Pct. below mean: ', inc_below_mean)
    print('\n')
Example #5
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')

    sample = np.power(10, log_sample)
    mean = np.mean(sample)
    cdf = thinkstats2.Cdf(sample)
    print "Median:", np.median(sample)
    print "Mean:", mean
    print "Skewness:", thinkstats2.Skewness(sample)
    print "Pearson's Skewness:", thinkstats2.PearsonMedianSkewness(sample)
    print "Percent of people with incomes <= mean:", cdf[mean]
    pdf = thinkstats2.EstimatedPdf(sample)
    thinkplot.Pdf(pdf)
Example #6
0
    lams = SimulateSample(lam, n, 1000)
    SampleDistrPLot(lams, n, lam)

thinkplot.Config(xlabel='L estimate',
                 ylabel='CDF',
                 title='Sampling distribution',
                 xlim=[0, 4],
                 legend=True)

#--- Chapter6 Ex1
df = hinc.ReadData()
log_sample = hinc2.InterpolateSample(df, log_upper=6.0)
sample = np.power(10, log_sample)
print('Mean = ', sample.mean())
print('Median =', thinkstats2.Median(sample))
print('Skewness =', thinkstats2.Skewness(sample))
print('Pearson Median Skweness =', thinkstats2.PearsonMedianSkewness(sample))
income_cdf = thinkstats2.Cdf(sample)
print(income_cdf.Prob(sample.mean()) * 100)


#--- Chapter8 Ex3
def SimulateGame(lam):
    t = 0
    goals = 0
    while True:
        time_int = random.expovariate(lam)
        t += time_int
        if t > 1:
            break
        goals += 1
Example #7
0
    greq = preg[preg.agepreg >= 30]
    less = preg[preg.agepreg < 30]

    assert len(greq) == 2635
    assert len(less) == 10606

    return greq, less


def MakePdfs(greq, less):
    greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna())
    lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna())
    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Pdf(greqpdf, label='greater/equal to 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.SubPlot(2)
    thinkplot.Pdf(lesspdf, label='less than 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.Show()


greq, less = MakeFrames()
MakePdfs(greq, less)
print "greater/equal to 30 skew:", thinkstats2.Skewness(greq.totalwgt_lb.dropna())
print "less than 30 skew:", thinkstats2.Skewness(less.totalwgt_lb.dropna())
print "greater/equal to 30 mean:", thinkstats2.Mean(greq.totalwgt_lb.dropna())
print "greater/equal to 30 median:", thinkstats2.Median(greq.totalwgt_lb.dropna())
print "less than 30 mean:", thinkstats2.Mean(less.totalwgt_lb.dropna())
print "less than 30 median:", thinkstats2.Median(less.totalwgt_lb.dropna())