コード例 #1
0
ファイル: hinc2.py プロジェクト: seppomerimaa/ThinkStats2
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
コード例 #2
0
ファイル: ch6.py プロジェクト: smithb16/ThinkStats2
def PearsonMedianSkewness(xs):
    median = thinkstats2.Median(xs)
    mean = RawMoment(xs, 1)
    var = CentralMoment(xs, 2)
    std = math.sqrt(var)
    gp = 3 * (mean - median) / std
    return gp
コード例 #3
0
def Summarize(data):
    mean = data.mean()
    std = data.std()
    median = thinkstats2.Median(data)
    print('mean', mean)
    print('std', std)
    print('median', median)
    print('skewness', thinkstats2.Skewness(data))
    print('pearson skewness', thinkstats2.PearsonMedianSkewness(data))

    return mean, median
コード例 #4
0
ファイル: density.py プロジェクト: kangxi5200/Think-Stats2
def Summarize(data):
    """Prints summary statistics.

    data: pandas Series
    """
    mean = data.mean()
    std = data.std()
    median = thinkstats2.Median(data)
    print('mean', mean)
    print('std', std)
    print('median', median)
    print('skewness', thinkstats2.Skewness(data))
    print('pearson skewness', thinkstats2.PearsonMedianSkewness(data))

    return mean, median
コード例 #5
0
def Experiment1(n=6, m=1000):
    mu = 0
    sigma = 1

    means = []
    medians = []
    for _ in range(m):
        xs = [random.gauss(mu, sigma) for i in range(n)]
        xbar = numpy.mean(xs)
        median = thinkstats2.Median(xs)
        means.append(xbar)
        medians.append(median)

    print 'rmse xbar', RMSE(means, mu)
    print 'rmse median', RMSE(medians, mu)
コード例 #6
0
def Experiment3(n=7, m=1000):
    lam = 2

    means = []
    medians = []
    for _ in range(m):
        xs = [random.expovariate(lam) for i in range(n)]
        L = 1 / numpy.mean(xs)
        Lm = math.log(2) / thinkstats2.Median(xs)
        means.append(L)
        medians.append(Lm)

    print 'rmse L', RMSE(means, lam)
    print 'rmse Lm', RMSE(medians, lam)
    print 'mean error L', MeanError(means, lam)
    print 'mean error Lm', MeanError(medians, lam)
コード例 #7
0
def describe_inc_dist(log_upper):
    log_sample = hinc2.InterpolateSample(df, log_upper=j)
    incomes = np.power(10, log_sample)

    inc_mean = thinkstats2.Mean(incomes)
    inc_med = thinkstats2.Median(incomes)
    inc_skew = thinkstats2.Skewness(incomes)
    inc_pearskew = thinkstats2.PearsonMedianSkewness(incomes)
    print('log_upper = ', j)
    print('Mean Income: ', inc_mean)
    print('Median Income: ', inc_med)
    print('Skewness: ', inc_skew)
    print('Pearson Median Skewness: ', inc_pearskew)

    cdf = thinkstats2.Cdf(incomes)
    inc_below_mean = cdf.Prob(inc_mean)
    print('Pct. below mean: ', inc_below_mean)
    print('\n')
コード例 #8
0
def SimulateSampleExpo(lam=2.0, n=10, iters=1000):
    """Simulate samples of exponential dist of lambda 'lam'
    of size 'n' for 'm' iters.
    lam: float shape parameter
    n: sample size
    iters: number of iterations

    return:
         Ls - estimates of lam based on mean
         Lms - estimates of lam based on median
    """
    Ls = []
    Lms = []
    for j in range(iters):
        xs = np.random.exponential(1.0 / lam, n)
        L = 1 / np.mean(xs)
        Lm = np.log(2) / thinkstats2.Median(xs)
        Ls.append(L)
        Lms.append(Lm)

    return Ls, Lms
コード例 #9
0
def Estimate3(n=7, iters=1000):
    """Evaulates sample mean and sample median as estimators for properties of
    exponential distribution.
    n: int sample size
    iters: int number of iterations

    return: None
    """
    lam = 2

    means = []
    medians = []
    for _ in range(iters):
        xs = np.random.exponential(1.0 / lam, n)
        L = 1 / np.mean(xs)
        Lm = np.log(2) / thinkstats2.Median(xs)
        means.append(L)
        medians.append(Lm)

    print('RMSE(means, lam):\n', RMSE(means, lam))
    print('RMSE(medians, lam):\n', RMSE(medians, lam))
    print('MeanError(means, lam):\n', MeanError(means, lam))
    print('MeanError(medians, lam):\n', MeanError(medians, lam))
コード例 #10
0
for n in n_arr:
    lams = SimulateSample(lam, n, 1000)
    SampleDistrPLot(lams, n, lam)

thinkplot.Config(xlabel='L estimate',
                 ylabel='CDF',
                 title='Sampling distribution',
                 xlim=[0, 4],
                 legend=True)

#--- Chapter6 Ex1
df = hinc.ReadData()
log_sample = hinc2.InterpolateSample(df, log_upper=6.0)
sample = np.power(10, log_sample)
print('Mean = ', sample.mean())
print('Median =', thinkstats2.Median(sample))
print('Skewness =', thinkstats2.Skewness(sample))
print('Pearson Median Skweness =', thinkstats2.PearsonMedianSkewness(sample))
income_cdf = thinkstats2.Cdf(sample)
print(income_cdf.Prob(sample.mean()) * 100)


#--- Chapter8 Ex3
def SimulateGame(lam):
    t = 0
    goals = 0
    while True:
        time_int = random.expovariate(lam)
        t += time_int
        if t > 1:
            break
コード例 #11
0
ファイル: 9_HO1.py プロジェクト: fullern1/previouscode
    greq = preg[preg.agepreg >= 30]
    less = preg[preg.agepreg < 30]

    assert len(greq) == 2635
    assert len(less) == 10606

    return greq, less


def MakePdfs(greq, less):
    greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna())
    lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna())
    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Pdf(greqpdf, label='greater/equal to 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.SubPlot(2)
    thinkplot.Pdf(lesspdf, label='less than 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.Show()


greq, less = MakeFrames()
MakePdfs(greq, less)
print "greater/equal to 30 skew:", thinkstats2.Skewness(greq.totalwgt_lb.dropna())
print "less than 30 skew:", thinkstats2.Skewness(less.totalwgt_lb.dropna())
print "greater/equal to 30 mean:", thinkstats2.Mean(greq.totalwgt_lb.dropna())
print "greater/equal to 30 median:", thinkstats2.Median(greq.totalwgt_lb.dropna())
print "less than 30 mean:", thinkstats2.Mean(less.totalwgt_lb.dropna())
print "less than 30 median:", thinkstats2.Median(less.totalwgt_lb.dropna())