Example #1
0
def test_ttest():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]
    all_tests = []
    t1 = smws.ttest_ind(x1, x2, alternative='larger', usevar='unequal')
    all_tests.append((t1, ttest_clinic_indep_1_g))
    t2 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal')
    all_tests.append((t2, ttest_clinic_indep_1_l))
    t3 = smws.ttest_ind(x1,
                        x2,
                        alternative='smaller',
                        usevar='unequal',
                        value=1)
    all_tests.append((t3, ttest_clinic_indep_1_l_mu))

    for res1, res2 in all_tests:
        assert_almost_equal(res1[0], res2.statistic, decimal=13)
        assert_almost_equal(res1[1], res2.p_value, decimal=13)
        #assert_almost_equal(res1[2], res2.df, decimal=13)

    cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2))
    ci = cm.tconfint_diff(alternative='two-sided', usevar='unequal')
    assert_almost_equal(ci, ttest_clinic_indep_1_two_mu.conf_int, decimal=13)
    ci = cm.tconfint_diff(alternative='two-sided', usevar='pooled')
    assert_almost_equal(ci,
                        ttest_clinic_indep_1_two_mu_pooled.conf_int,
                        decimal=13)
    ci = cm.tconfint_diff(alternative='smaller', usevar='unequal')
    assert_almost_equal_inf(ci, ttest_clinic_indep_1_l.conf_int, decimal=13)
    ci = cm.tconfint_diff(alternative='larger', usevar='unequal')
    assert_almost_equal_inf(ci, ttest_clinic_indep_1_g.conf_int, decimal=13)

    #test get_compare
    cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2))
    cm1 = cm.d1.get_compare(cm.d2)
    cm2 = cm.d1.get_compare(x2)
    cm3 = cm.d1.get_compare(np.hstack((x2, x2)))
    #all use the same d1, no copying
    assert_(cm.d1 is cm1.d1)
    assert_(cm.d1 is cm2.d1)
    assert_(cm.d1 is cm3.d1)
Example #2
0
def t_test_two_samp(a, b, alpha, alternative='two-sided'):
    diff = a.mean() - b.mean()

    res = ss.ttest_ind(a, b)

    means = ws.CompareMeans(ws.DescrStatsW(a), ws.DescrStatsW(b))
    confint = means.tconfint_diff(alpha=alpha,
                                  alternative=alternative,
                                  usevar='unequal')
    degfree = means.dof_satt()

    index = [
        'DegFreedom', 'Difference', 'Statistic', 'PValue', 'Low95CI',
        'High95CI'
    ]
    return pd.Series([degfree, diff, res[0], res[1], confint[0], confint[1]],
                     index=index)
Example #3
0
plt.ylabel('Median value of owner-occupied homes in $1000s')
plt.show()

print('\n\nPart 2\n---------------------------')
print(
    "The null hypothesis examines the data to find how often we could get the same data randomly"
)
print(
    "Normally, we reject the null hypothesis if by random we could only get that result less than 5% of the time."
)
dfchas1 = df.MEDV[df.CHAS == 1]
dfchas0 = df.MEDV[df.CHAS == 0]
ttest, pval = stats.ttest_ind(dfchas1, dfchas0)
print('P-val: ', pval, 'ttset value', ttest)

means = ws.CompareMeans(ws.DescrStatsW(dfchas1), ws.DescrStatsW(dfchas0))
confint = means.tconfint_diff(alpha=0.05,
                              alternative='two-sided',
                              usevar='unequal')
print('Confidence interval:', confint[0], confint[1])
ratio = len(dfchas0) / len(dfchas1)
gsize = tt_ind_solve_power(effect_size=0.6,
                           nobs1=None,
                           alpha=0.05,
                           power=0.8,
                           ratio=ratio,
                           alternative='two-sided')
print(
    'Assume an effect size (Cohen’s d) of 0.6. If you want 80% power, what group size is necessary?',
    gsize)