def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14) assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14) assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14) assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14) assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14) # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t, p, d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] cm = CompareMeans(d1w_2d, d2w_2d) ressm = cm.ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14) assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14) assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14) assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14) assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14) # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t, p, d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] cm = CompareMeans(d1w_2d, d2w_2d) ressm = cm.ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='unequal') res1 = cm1.ttest_ind(usevar='unequal') res2 = cm2.ttest_ind(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.tconfint_diff(usevar='pooled') res1 = cm1.tconfint_diff(usevar='pooled') res2 = cm2.tconfint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.tconfint_diff(usevar='unequal') res1 = cm1.tconfint_diff(usevar='unequal') res2 = cm2.tconfint_diff(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='separate') res1 = cm1.ttest_ind(usevar='separate') res2 = cm2.ttest_ind(usevar='separate') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.confint_diff(usevar='pooled') res1 = cm1.confint_diff(usevar='pooled') res2 = cm2.confint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.confint_diff(usevar='separate') res1 = cm1.confint_diff(usevar='separate') res2 = cm2.confint_diff(usevar='separate') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
a = 0.05 # 有意水準(デフォルト) = 1 - 信頼係数 alt = 'two-sided' # 両側検定(デフォルト) # 左片側検定なら'smaller' # 右片側検定なら'larger' d = DescrStatsW(np.array(X) - np.array(Y)) # 対標本の場合 d.ttest_mean(alternative=alt)[1] # p値 #> 0.0006415571512322235 d.tconfint_mean(alpha=a, alternative=alt) # 信頼区間 #> (-3.9955246743198867, -1.3644753256801117) c = CompareMeans(DescrStatsW(X), DescrStatsW(Y)) # 対標本でない場合 ve = 'pooled' # 等分散を仮定する(デフォルト).仮定しないなら'unequal'. c.ttest_ind(alternative=alt, usevar=ve)[1] # p値 #> 0.000978530937238609 c.tconfint_diff(alpha=a, alternative=alt, usevar=ve) # 信頼区間 #> (-4.170905570517185, -1.1890944294828283) ### 4.4.4 独立性の検定(カイ2乗検定) import pandas as pd my_url = ('https://raw.githubusercontent.com/taroyabuki' '/fromzero/master/data/smoker.csv') my_data = pd.read_csv(my_url) my_data.head() #> alive smoker #> 0 Yes No