def test_ttest(): x1, x2 = clinic[:15, 2], clinic[15:, 2] all_tests = [] t1 = smws.ttest_ind(x1, x2, alternative='larger', usevar='unequal') all_tests.append((t1, ttest_clinic_indep_1_g)) t2 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal') all_tests.append((t2, ttest_clinic_indep_1_l)) t3 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal', value=1) all_tests.append((t3, ttest_clinic_indep_1_l_mu)) for res1, res2 in all_tests: assert_almost_equal(res1[0], res2.statistic, decimal=13) assert_almost_equal(res1[1], res2.p_value, decimal=13) #assert_almost_equal(res1[2], res2.df, decimal=13) cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) ci = cm.tconfint_diff(alternative='two-sided', usevar='unequal') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='two-sided', usevar='pooled') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu_pooled.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='smaller', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_l.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='larger', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_g.conf_int, decimal=13) #test get_compare cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) cm1 = cm.d1.get_compare(cm.d2) cm2 = cm.d1.get_compare(x2) cm3 = cm.d1.get_compare(np.hstack((x2, x2))) #all use the same d1, no copying assert_(cm.d1 is cm1.d1) assert_(cm.d1 is cm2.d1) assert_(cm.d1 is cm3.d1)
def t_test_two_samp(a, b, alpha, alternative='two-sided'): diff = a.mean() - b.mean() res = ss.ttest_ind(a, b) means = ws.CompareMeans(ws.DescrStatsW(a), ws.DescrStatsW(b)) confint = means.tconfint_diff(alpha=alpha, alternative=alternative, usevar='unequal') degfree = means.dof_satt() index = [ 'DegFreedom', 'Difference', 'Statistic', 'PValue', 'Low95CI', 'High95CI' ] return pd.Series([degfree, diff, res[0], res[1], confint[0], confint[1]], index=index)
plt.ylabel('Median value of owner-occupied homes in $1000s') plt.show() print('\n\nPart 2\n---------------------------') print( "The null hypothesis examines the data to find how often we could get the same data randomly" ) print( "Normally, we reject the null hypothesis if by random we could only get that result less than 5% of the time." ) dfchas1 = df.MEDV[df.CHAS == 1] dfchas0 = df.MEDV[df.CHAS == 0] ttest, pval = stats.ttest_ind(dfchas1, dfchas0) print('P-val: ', pval, 'ttset value', ttest) means = ws.CompareMeans(ws.DescrStatsW(dfchas1), ws.DescrStatsW(dfchas0)) confint = means.tconfint_diff(alpha=0.05, alternative='two-sided', usevar='unequal') print('Confidence interval:', confint[0], confint[1]) ratio = len(dfchas0) / len(dfchas1) gsize = tt_ind_solve_power(effect_size=0.6, nobs1=None, alpha=0.05, power=0.8, ratio=ratio, alternative='two-sided') print( 'Assume an effect size (Cohen’s d) of 0.6. If you want 80% power, what group size is necessary?', gsize)