def test_confint(testdata): result = confidence_interval(testdata, control_label='A') c_means1 = CompareMeans(DescrStatsW(testdata['kpi1']['B']), DescrStatsW(testdata['kpi1']['A'])) c_means2 = CompareMeans(DescrStatsW(testdata['kpi2']['B']), DescrStatsW(testdata['kpi2']['A'])) expected1 = c_means1.tconfint_diff() expected2 = c_means2.zconfint_diff() assert result['B']['kpi1'] == expected1 assert result['B']['kpi2'] == expected2
def mean_diff_confint_ind(sample1, sample2, alpha=0.05): """Доверительный интервал разности средних для двух независимых выборок Parameters ---------- sample1 : array_like Первая выборка sample2 : array_like Вторая выборка alpha : float in (0, 1) Уровень доверия, рассчитывается как ``1-alpha`` Returns ------- lower, upper : floats Левая и правая граница доверительного интервала """ cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2)) return cm.tconfint_diff(alpha=alpha)
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='unequal') res1 = cm1.ttest_ind(usevar='unequal') res2 = cm2.ttest_ind(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.tconfint_diff(usevar='pooled') res1 = cm1.tconfint_diff(usevar='pooled') res2 = cm2.tconfint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.tconfint_diff(usevar='unequal') res1 = cm1.tconfint_diff(usevar='unequal') res2 = cm2.tconfint_diff(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal( ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='unequal') res1 = cm1.ttest_ind(usevar='unequal') res2 = cm2.ttest_ind(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.tconfint_diff(usevar='pooled') res1 = cm1.tconfint_diff(usevar='pooled') res2 = cm2.tconfint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.tconfint_diff(usevar='unequal') res1 = cm1.tconfint_diff(usevar='unequal') res2 = cm2.tconfint_diff(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
# 右片側検定なら'larger' d = DescrStatsW(np.array(X) - np.array(Y)) # 対標本の場合 d.ttest_mean(alternative=alt)[1] # p値 #> 0.0006415571512322235 d.tconfint_mean(alpha=a, alternative=alt) # 信頼区間 #> (-3.9955246743198867, -1.3644753256801117) c = CompareMeans(DescrStatsW(X), DescrStatsW(Y)) # 対標本でない場合 ve = 'pooled' # 等分散を仮定する(デフォルト).仮定しないなら'unequal'. c.ttest_ind(alternative=alt, usevar=ve)[1] # p値 #> 0.000978530937238609 c.tconfint_diff(alpha=a, alternative=alt, usevar=ve) # 信頼区間 #> (-4.170905570517185, -1.1890944294828283) ### 4.4.4 独立性の検定(カイ2乗検定) import pandas as pd my_url = ('https://raw.githubusercontent.com/taroyabuki' '/fromzero/master/data/smoker.csv') my_data = pd.read_csv(my_url) my_data.head() #> alive smoker #> 0 Yes No #> 1 Yes No #> 2 Yes No #> 3 Yes No
def fn(control, test): c_means = CompareMeans(DescrStatsW(test), DescrStatsW(control)) if _is_proportion(control, test): return c_means.zconfint_diff() else: return c_means.tconfint_diff()
def mean_diff_confint_ind(sample1, sample2): cm = CompareMeans(DescrStatsW(sample1), DescrStatsW(sample2)) return cm.tconfint_diff()