コード例 #1
0
def Corr(xs, ys):
    xs = np.asarray(xs)
    ys = np.asarray(ys)

    meanx, varx = thinkstats2.MeanVar(xs)
    meany, vary = thinkstats2.MeanVar(ys)

    corr = Cov(xs, ys, meanx, meany) / np.sqrt(varx * vary)
    return corr
コード例 #2
0
ファイル: heri15.py プロジェクト: afcarl/HeriReligion
def MakeErrorModel(model, ys, ts, n=100):
    """Makes a model that captures sample error and residual error.

    model: string representation of the regression model
    ys:    dependent variable
    ts:    explanatory variable
    n:     number of simulations to run

    Returns a pair of models, where each model is a pair of rows.
    """
    # estimate mean and stddev of the residuals
    residuals = Residuals(model, ys, ts)
    mu, var = thinkstats2.MeanVar(residuals)
    sig = math.sqrt(var)

    # make the best fit
    fts, fys = MakeFit(model, ys, ts)

    # resample residuals and generate hypothetical fits
    fits = []
    for i in range(n):
        fake_ys = [fy + random.gauss(mu, sig) for fy in fys[:-1]]
        _, fake_fys = MakeFit(model, fake_ys, ts)
        fits.append(fake_fys)

    # find the 90% CI in each column
    columns = zip(*fits)

    sample_error = MakeStderr(columns)
    total_error = MakeStderr(columns, mu, var)

    return fts, sample_error, total_error
コード例 #3
0
ファイル: agemodel.py プロジェクト: wu12345/ThinkStats2
def main(name, data_dir=''):
    pool, firsts, others = MakeTables(data_dir)

    for table in [pool, firsts, others]:
        print table.name, len(table.records),
        print len(table.ages), len(table.weights)

    # compute differences in mean age and weight
    age_diff = DifferenceInMeans(firsts, others, 'ages')
    weight_diff = DifferenceInMeans(firsts, others, 'weights')

    # get ages and weights
    ages, weights = GetAgeWeight(pool)
    print 'Mean, var weight', thinkstats2.MeanVar(weights)

    # compute a least squares fit
    inter, slope, R2 = ComputeLeastSquares(ages, weights)

    # see how much of the weight difference is explained by age
    weight_diff_explained = age_diff * slope
    print 'Weight difference explained by age:', weight_diff_explained
    print 'Fraction explained:', weight_diff_explained / weight_diff
    print

    # make a table of mean weight for 5-year age bins
    weight_dict = Partition(ages, weights)
    MakeLinePlot(weight_dict)

    # the correlations are slightly higher if we trim outliers
    ages, weights = GetAgeWeight(pool, low=4, high=12)
    inter, slope, R2 = ComputeLeastSquares(ages, weights)

    MakeFigures(pool, firsts, others)
コード例 #4
0
ファイル: heri15.py プロジェクト: afcarl/HeriReligion
def MakeStderr(columns, mu2=0, var2=0):
    """Finds a confidence interval for each column.

    Returns two rows: the low end of the intervals and the high ends.
    """
    stats = [thinkstats2.MeanVar(ys) for ys in columns]

    min_fys = [mu1 + mu2 - 2 * math.sqrt(var1 + var2) for mu1, var1 in stats]
    max_fys = [mu1 + mu2 + 2 * math.sqrt(var1 + var2) for mu1, var1 in stats]
    return min_fys, max_fys
コード例 #5
0
def main():
    random.seed(17)

    rho = -0.8
    res = CorrelatedGenerator(1000, rho)
    xs, ys = zip(*res)

    a = 1.0
    b = 0.0
    xs = [a * x + b for x in xs]

    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'covariance', thinkstats2.Cov(xs, ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)
    print 'Spearman corr', thinkstats2.SpearmanCorr(xs, ys)

    thinkplot.Scatter(xs, ys)
    thinkplot.Show()
コード例 #6
0
def main():
    random.seed(17)

    rho = 0.8
    xs, ys = SatIqData(1000, rho)
    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)

    inter, slope = thinkstats2.LeastSquares(xs, ys)
    print 'inter', inter
    print 'slope', slope

    fxs, fys = thinkstats2.FitLine(xs, inter, slope)
    res = thinkstats2.Residuals(xs, ys, inter, slope)
    R2 = thinkstats2.CoefDetermination(ys, res)
    print 'R2', R2

    thinkplot.Plot(fxs, fys, color='gray', alpha=0.2)
    thinkplot.Scatter(xs, ys)
    thinkplot.Show()
コード例 #7
0
def main():
    thinkstats2.RandomSeed(17)

    # get the data
    live, firsts, others = first.MakeFrames()
    mean_var = thinkstats2.MeanVar(live.prglngth)
    print('(Mean, Var) of prglength for live births', mean_var)
    data = firsts.prglngth.values, others.prglngth.values

    # test the difference in means
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis1',
                   title='Permutation test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in std
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis2',
                   title='Permutation test',
                   xlabel='difference in std (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in means by resampling
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis3',
                   title='Resampling test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)
コード例 #8
0
    xs = np.asarray(xs)
    ys = np.asarray(ys)

    if meanx is None:
        meanx = np.mean(xs)
    if meany is None:
        meany = np.mean(ys)

    cov = np.dot(xs-meanx, ys-meany) / len(xs)
    return cov

def Corr(xs, ys): # Pearson's
    xs = np.asarray(xs)
    ys = np.asarray(ys)

    meanx, varx = thinkstats2.MeanVar(xs)
    meany, vary = thinkstats2.MeanVar(ys)

    corr = Cov(xs, ys, meanx, meany) / np.sqrt(varx * vary)
    return corr
 
import pandas as pd

def SpearmanCorr(xs, ys):  # Spearmans'
    xranks = pd.Series(xs).rank()
    yranks = pd.Series(ys).rank()
    return Corr(xranks, yranks)

print('Corr', Corr(ages, weights))
print('SpearmanCorr', SpearmanCorr(ages, weights))
# RESULTS: Corr 0.0688339703541
コード例 #9
0
    def testMeanVar(self):
        t = [1, 1, 1, 3, 3, 591]
        mean, var = thinkstats2.MeanVar(t)

        self.assertAlmostEqual(mean, 100.0)
        self.assertAlmostEqual(var, 48217.0)