def print_critics_users_diff(users_scores, critics_scores):
    users_scores_mean = users_scores.mean()
    critics_scores_mean = critics_scores.mean()

    print("Mean CRITIC score:", "%.2f" % critics_scores_mean)
    print("Mean USER score:", "%.2f" % users_scores_mean)
    print("\nUSER mean - CRITIC mean:",
          "%.2f" % (users_scores_mean - critics_scores_mean))
    print(
        "Users rated albums",
        "%.2f" % ((users_scores_mean - critics_scores_mean) * 100 /
                  critics_scores_mean), "percent higher than critics did.")
    print("\nEffect size (Cohen's D) of",
          "%.2f" % ts2.CohenEffectSize(users_scores, critics_scores))

    types = ("Users", "Critics")
    ypos = np.arange(len(types))
    scores = [users_scores_mean, critics_scores_mean]

    plt.bar(ypos, scores, align='center', alpha=0.5)
    plt.xticks(ypos, types)
    plt.ylabel('Mean score')
    plt.title('Mean album review score (out of 100)')
    plt.ylim(60, 90)
    plt.show()
Esempio n. 2
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert mode == 39, mode

    # test AllModes
    modes = AllModes(hist)
    assert modes[0][1] == 4693, modes[0][1]

    for value, freq in modes[:5]:
        print(value, freq)

    firsts_wgt = firsts.totalwgt_lb.mean()
    others_wgt = others.totalwgt_lb.mean()
    print("firsts = {} pounds, others = {} pounds, dif = {} pounds ".format(
        firsts_wgt, others_wgt, firsts_wgt - others_wgt))

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print(d)

    print('%s: All tests passed.' % script)
Esempio n. 3
0
def Summarize(live, firsts, others):
    """Print various summary statistics."""

    mean = live.prglngth.mean()
    var = live.prglngth.var()
    std = live.prglngth.std()

    print('Live mean', mean)
    print('Live variance', var)
    print('Live std', std)

    mean1 = firsts.prglngth.mean()
    mean2 = others.prglngth.mean()

    var1 = firsts.prglngth.var()
    var2 = others.prglngth.var()

    print('Mean')
    print('First babies', mean1)
    print('Others', mean2)

    print('Variance')
    print('First babies', var1)
    print('Others', var2)

    print('Difference in weeks', mean1 - mean2)
    print('Difference in hours', (mean1 - mean2) * 7 * 24)

    print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100)

    d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
    print('Cohen d', d)
Esempio n. 4
0
def WeightDifference(live, firsts, others):
    """Explore the difference in weight between first babies and others.

    live: DataFrame of all live births
    firsts: DataFrame of first babies
    others: DataFrame of others
    """
    mean0 = live.totalwgt_lb.mean()
    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()

    var1 = firsts.totalwgt_lb.var()
    var2 = others.totalwgt_lb.var()

    print('Mean')
    print('First babies', mean1)
    print('Others', mean2)

    print('Variance')
    print('First babies', var1)
    print('Others', var2)

    print('Difference in lbs', mean1 - mean2)
    print('Difference in oz', (mean1 - mean2) * 16)

    print('Difference relative to mean (%age points)', 
          (mean1 - mean2) / mean0 * 100)

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print('Cohen d', d)
Esempio n. 5
0
def WeightDifference(live, firsts, others):
    # calculate mean for difference in lbs
    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()

    print('Difference in lbs', mean1 - mean2)  # firsts are lighter than others
    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)

    print('Cohen d', d)
    return d
Esempio n. 6
0
def wt_diff(live, firsts, others):
    mean0 = live.totalwgt_lb.mean()
    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()
    var1 = firsts.totalwgt_lb.var()
    var2 = others.totalwgt_lb.var()
    print("mean: ", var1, var2)
    print("diff_lb: ", mean1 - mean2)
    print("diff_rel_mean: ", (mean1 - mean2) / mean0 * 100)
    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print("cohen's d: ", d)
Esempio n. 7
0
def WeightComparison():
    preg = nsfg.ReadFemPreg()

    live = preg[preg.outcome == 1]
    firsts = live[live.birthord == 1]
    others = live[live.birthord != 1]

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print(d)
    print(
        "Still a really small effect size, though not as small as difference in pregnancy length"
    )
Esempio n. 8
0
def WeightDifferences(first, other, live):
    print("Means:")

    first_mean = first.totalwgt_lb.mean()
    print("First mean:", first_mean)

    other_mean = other.totalwgt_lb.mean()
    print("Other mean:", other_mean)

    live_mean = live.totalwgt_lb.mean()
    print("Live mean:", live_mean)

    print("Means differences:", (abs(first_mean - other_mean) / live_mean)*100)

    d = thinkstats2.CohenEffectSize(first.totalwgt_lb, other.totalwgt_lb)
    return d
Esempio n. 9
0
def WeightDiff(firsts, others):
    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()

    var1 = firsts.totalwgt_lb.var()
    var2 = others.totalwgt_lb.var()

    print('Mean')
    print('First babies ', mean1)
    print('Others ', mean2)

    print('Variance')
    print('First babies ', var1)
    print('Others ', var2)

    print('Difference mean that First babies and others ', mean1 - mean2)

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print('Cohen d ', d)
Esempio n. 10
0
def Summarize(greq, less):
    """Print various summary statistics."""

    gMean = greq.prglngth.mean()
    gVar = greq.prglngth.var()

    print '>= 30 mean:', gMean
    print '>= 30 variance:', gVar

    lMean = less.prglngth.mean()
    lVar = less.prglngth.var()

    print '< 30 mean:', lMean
    print '< 30 variance:', lVar

    print 'Difference between means:', (abs(gMean - lMean))

    d = thinkstats2.CohenEffectSize(greq.prglngth, less.prglngth)
    print 'Cohen d:', d
Esempio n. 11
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()

    var1 = firsts.totalwgt_lb.var()
    var2 = others.totalwgt_lb.var()

    print('Mean Weight')
    print('First babies', mean1)
    print('Others babies', mean2)

    print('Variance in Weight')
    print('First babies', var1)
    print('Others babies', var2)

    print('Difference in lbs', mean1 - mean2)

    cohen_d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb,
                                          others.totalwgt_lb)
    print('Cohen d', cohen_d)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert mode == 39, mode

    # test AllModes
    modes = AllModes(hist)
    assert modes[0][1] == 4693, modes[0][1]

    for value, freq in modes[:5]:
        print(value, freq)

    print('%s: All tests passed.' % script)
Esempio n. 12
0
def CompareWeight(live, firsts, others):
    
    meanlive = live.totalwgt_lb.mean()
    meanfirsts = firsts.totalwgt_lb.mean()
    meanothers = others.totalwgt_lb.mean()
    
    print('Mean')
    print('Firsts', meanfirsts)
    print('Others', meanothers)
    
    print('Difference in lbs', meanfirsts-meanothers)
    print('Difference in %', (meanfirsts-meanothers) / meanlive * 100)

    varfirsts = firsts.totalwgt_lb.var()
    varothers = others.totalwgt_lb.var()
    
    print('Variance')
    print('Firsts', varfirsts)
    print('Others', varothers)
    
    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print('Cohen effect', d)
Esempio n. 13
0
def light_heavy(firsts, others):
    print(firsts.totalwgt_lb.mean(), others.totalwgt_lb.mean())
    print(thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb))
    return ""
Esempio n. 14
0
import math, thinkstats2, nsfg
preg = nsfg.ReadFemPreg()
live = preg[preg.outcome == 1]

firsts = live[live.birthord == 1]
others = live[live.birthord != 1]

mean_f = firsts.totalwgt_lb.mean()
mean_o = others.totalwgt_lb.mean()

var_f = firsts.totalwgt_lb.var()
var_o = others.totalwgt_lb.var()

d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)

print('First babies average weight', mean_f)
print('Other babies average weight', mean_o)

print('Difference in weight between first and other babies, in oz:',
      (mean_f - mean_o) * 16)

print('Difference in weight between first and other babies, % :',
      (mean_f - mean_o) * 100 / mean_o)

print('First babies variance', var_f)
print('Other babies variance', var_o)

print('Cohen d for weight', d)

n_f = len(firsts)
n_o = len(others)
Esempio n. 15
0
        self.n, self.m = len(group1), len(group2)
        self.pool = np.hstack((group1, group2))

    def RunModel(self):
        np.random.shuffle(self.pool)
        data = self.pool[:self.n], self.pool[self.n:]
        return data


male, female = MakeFrames()
alcwknd = male.alcwknd, female.alcwknd
MakeHists(male, female)
MakePmfs(male, female)
MakeStep(male, female)
MakeCdfs(male, female)
ht = DiffMeansPermute(alcwknd)
pvalue = ht.PValue()
ht.PlotCdf()
thinkplot.Config(xlabel='Difference in Means',
                 ylabel='CDF',
                 title='Weekend Alcohol Consumption')
thinkplot.Show()
print 'Weekend Alcohol Consumption:\nMen:\nMean:', male.alcwknd.mean(
), '\nVariance:', male.alcwknd.var()
print '\nWomen:\nMean:', female.alcwknd.mean(
), '\nVariance:', female.alcwknd.var()
print '\nDifference in means:', abs(male.alcwknd.mean() -
                                    female.alcwknd.mean())
print '\nCohen d:', thinkstats2.CohenEffectSize(male.alcwknd, female.alcwknd)
print '\np-value:', pvalue
Esempio n. 16
0
    print('Others', var2)

    print('Difference in lbs', mean1 - mean2)
    print('Difference in oz', (mean1 - mean2) * 16)

    print('Difference relative to mean (%age points)', 
          (mean1 - mean2) / mean0 * 100)

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print('Cohen d', d)


# In[319]:


d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
print('Cohen d', d)


# In[325]:


mean0 = live.totalwgt_lb.mean()
mean1 = firsts.totalwgt_lb.mean()
mean2 = others.totalwgt_lb.mean()

print('Mean')
print('First babies', mean1)
print('Others', mean2)

Esempio n. 17
0
        group2 = np.random.choice(self.pool, self.m, replace=True)
        data = group1, group2
        return data


preg = nsfg.ReadFemPreg()
live = preg[preg["outcome"] == 1]
resp = nsfg.ReadFemResp()
bs = brfss.ReadBrfss()
income = hinc.ReadData()
log_intp_income = hinc2.InterpolateSample(income, log_upper=6.0)

# Q1. Think Stats Chapter 2 Exercise 4 (effect size of Cohen's d)
first_wt = live.loc[preg["birthord"] == 1, "totalwgt_lb"]
other_wt = live.loc[preg["birthord"] != 1, "totalwgt_lb"]
ts.CohenEffectSize(first_wt, other_wt)

# Q2. Think Stats Chapter 3 Exercise 1 (actual vs. biased)
d = np.diff(np.unique(resp["numkdhh"])).min()
left_of_first_bin = resp["numkdhh"].min() - float(d) / 2
right_of_last_bin = resp["numkdhh"].max() + float(d) / 2
plt.clf()
plt.hist(resp["numkdhh"],
         bins=np.arange(left_of_first_bin, right_of_last_bin + d, d),
         histtype="step",
         normed=True,
         label="Actual")
plt.hist(resp["numkdhh"],
         bins=np.arange(left_of_first_bin, right_of_last_bin + d, d),
         histtype="step",
         weights=resp["numkdhh"],
Esempio n. 18
0
def PregnancyLengthDifferences(first, other):
    return thinkstats2.CohenEffectSize(first.prglngth, other.prglngth)
Esempio n. 19
0
        new_pmf[val] *= val
    new_pmf.Normalize()
    return new_pmf


preg = nsfg.ReadFemPreg()
resp = nsfg.ReadFemResp()

live, firsts, others = first.MakeFrames()

#--- Chapter2 Ex4
wgt_live = live.totalwgt_lb.dropna()
wgt_first = firsts.totalwgt_lb.dropna()
wgt_other = others.totalwgt_lb.dropna()
mean_diff = 100 * (wgt_first.mean() - wgt_other.mean()) / wgt_live.mean()
wgt_cohend = thinkstats2.CohenEffectSize(wgt_first, wgt_other)
plen_cohend = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
print('Difference in relative mean:', mean_diff)
print('Cohen\'s d for total weight in lbs:', wgt_cohend)
print('Cohen\'s d for pregnancy length in weeks:', plen_cohend)

#--- Chapter3 Ex1
actual_pmf = thinkstats2.Pmf(resp.numkdhh, label='actual')
biased_pmf = BiasPmf(actual_pmf, label='biased')
thinkplot.PrePlot(2)
actual_hist = thinkplot.Pmf(actual_pmf)
biased_hist = thinkplot.Pmf(biased_pmf)
thinkplot.Show(xlabel='#kids in household', ylabel='PMF')
print('Actual Mean:', actual_pmf.Mean())
print('Biased Mean:', biased_pmf.Mean())
Esempio n. 20
0
def preg_length_comparison(live, firsts, others):
    # Utilizing existing function from thinkstats2
    d = thinkstats2.CohenEffectSize(firsts['prglngth'], others['prglngth'])
    return print('Preg Length Cohen Effect Size:', d)
Esempio n. 21
0
def WeightDifference(firsts, others):
    print(firsts.totalwgt_lb.mean() - others.totalwgt_lb.mean())
    print(thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb))
Esempio n. 22
0
def weight_comparison(live, firsts, others):
    # Utilizing existing function from thinkstats2
    d = thinkstats2.CohenEffectSize(firsts['totalwgt_lb'], others['totalwgt_lb'])
    return print('Weight Cohen Effect Size:', d)
Esempio n. 23
0
def COHEN(Frame1, Frame2):
    CD = (Frame1.mean() - Frame2.mean())
    SD = ((Frame1.std()**2 + Frame2.std()**2) / 2)**.5
    CD = CD / SD
    return CD


import thinkstats2, first

live, firsts, others = first.MakeFrames(
)  #Gets data from dataframe python. taken from solutions obvs

TotalW = live.totalwgt_lb
FirstW = firsts.totalwgt_lb
OtherW = others.totalwgt_lb

MyCohen = COHEN(FirstW, OtherW)
NotMyCohen = thinkstats2.CohenEffectSize(FirstW, OtherW)

FirstPMean = firsts.prglngth.mean()
OtherPMean = others.prglngth.mean()

print("the mean weight of first babies is " + str(FirstW.mean()) + " lbs")
print("the mean weight of other babies is " + str(OtherW.mean()) + " lbs")
print("the Cohen's d between the two sets is " + str(MyCohen))
print("the mean pregnancy length of first babies was " + str(FirstPMean) +
      " weeks")
print("the mean pregnancy length of other babies was " + str(OtherPMean) +
      " weeks")
print("the difference between the pregnancy lengths was " +
      str(FirstPMean - OtherPMean))