예제 #1
0
def test_ordinal_association():

    for k, table in enumerate(tables):

        row_scores = 1 + np.arange(table.shape[0])
        col_scores = 1 + np.arange(table.shape[1])

        # First set of scores
        rslt = ctab.Table(table, shift_zeros=False).test_ordinal_association(
            row_scores, col_scores)
        assert_allclose(rslt.statistic, r_results.loc[k, "lbl_stat"])
        assert_allclose(rslt.null_mean, r_results.loc[k, "lbl_expval"])
        assert_allclose(rslt.null_sd**2, r_results.loc[k, "lbl_var"])
        assert_allclose(rslt.zscore**2,
                        r_results.loc[k, "lbl_chi2"],
                        rtol=1e-5,
                        atol=1e-5)
        assert_allclose(rslt.pvalue,
                        r_results.loc[k, "lbl_pvalue"],
                        rtol=1e-5,
                        atol=1e-5)

        # Second set of scores
        rslt = ctab.Table(table, shift_zeros=False).test_ordinal_association(
            row_scores, col_scores**2)
        assert_allclose(rslt.statistic, r_results.loc[k, "lbl2_stat"])
        assert_allclose(rslt.null_mean, r_results.loc[k, "lbl2_expval"])
        assert_allclose(rslt.null_sd**2, r_results.loc[k, "lbl2_var"])
        assert_allclose(rslt.zscore**2, r_results.loc[k, "lbl2_chi2"])
        assert_allclose(rslt.pvalue,
                        r_results.loc[k, "lbl2_pvalue"],
                        rtol=1e-5,
                        atol=1e-5)
예제 #2
0
def test_shifting():

    t = np.zeros((3, 4), dtype=np.float64)
    result = np.full((3, 4), 0.5)
    assert_equal(ctab.Table(t, shift_zeros=False).table, t)
    assert_equal(ctab.Table(t, shift_zeros=True).table, result)

    t = np.asarray([[0, 1, 2], [3, 0, 4], [5, 6, 0]], dtype=np.float64)
    r = np.asarray([[0.5, 1, 2], [3, 0.5, 4], [5, 6, 0.5]], dtype=np.float64)
    assert_equal(ctab.Table(t).table, r)
    assert_equal(ctab.Table(t, shift_zeros=True).table, r)
예제 #3
0
def test_local_odds():

    table = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    table = np.asarray(table)
    tbl_obj = ctab.Table(table)

    loc_odds = tbl_obj.local_oddsratios
    assert_allclose(loc_odds[0, 0], 5 / 8.)
    assert_allclose(loc_odds[0, 1], 12 / float(15), atol=1e-5,
                    rtol=1e-5)
    assert_allclose(np.log(loc_odds), tbl_obj.local_log_oddsratios,
                    atol=1e-5, rtol=1e-5)
예제 #4
0
def test_cumulative_odds():

    table = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    table = np.asarray(table)
    tbl_obj = ctab.Table(table)

    cum_odds = tbl_obj.cumulative_oddsratios
    assert_allclose(cum_odds[0, 0], 28 / float(5 * 11))
    assert_allclose(cum_odds[0, 1], (3 * 15) / float(3 * 24), atol=1e-5,
                    rtol=1e-5)
    assert_allclose(np.log(cum_odds), tbl_obj.cumulative_log_oddsratios,
                    atol=1e-5, rtol=1e-5)
예제 #5
0
def test_chi2_association():

    np.random.seed(8743)

    table = np.random.randint(10, 30, size=(4, 4))

    from scipy.stats import chi2_contingency
    rslt_scipy = chi2_contingency(table)

    b = ctab.Table(table).test_nominal_association()

    assert_allclose(b.statistic, rslt_scipy[0])
    assert_allclose(b.pvalue, rslt_scipy[1])
예제 #6
0
def test_resids():

    # CHD x serum data
    table = [[12, 8, 31, 41], [307, 246, 439, 245]]

    # These results come from SAS
    fit = [[22.083, 17.583, 32.536, 19.798], [296.92, 236.42, 437.46, 266.2]]
    c2 = [[4.6037, 5.223, 0.0725, 22.704], [0.3424, 0.3885, 0.0054, 1.6886]]

    # These are regression tests
    pr = np.array([[-2.14562121, -2.28538719, -0.26923882, 4.7649169],
                   [0.58514314, 0.62325942, 0.07342547, -1.29946443]])
    sr = np.array([[-2.55112945, -2.6338782, -0.34712127, 5.5751083],
                   [2.55112945, 2.6338782, 0.34712127, -5.5751083]])

    tab = ctab.Table(table)
    assert_allclose(tab.fittedvalues, fit, atol=1e-4, rtol=1e-4)
    assert_allclose(tab.chi2_contribs, c2, atol=1e-4, rtol=1e-4)
    assert_allclose(tab.resid_pearson, pr, atol=1e-4, rtol=1e-4)
    assert_allclose(tab.standardized_resids, sr, atol=1e-4, rtol=1e-4)
print('Attribute ', attr, 'Value List: ', attr_list)
print('=' * 80)

#
# compute the chi-square test
#

ct0 = pd.crosstab(columns=df['A21'], index=df[attr])
chi2_test = st.chi2_contingency(ct0)
print('Expected Frequencies', chi2_test[3])
print('-' * 80)

#
# compute the chi-square contributions
#
table = sct.Table(ct0)
print('chi-square contributions: ', table.chi2_contribs)

#
# Create a new frame with the expected values if the attribute and default
# where independent
#
df0 = pd.DataFrame(chi2_test[3], columns=ct0.columns, index=ct0.index)

#
# Append the expected values of Good and Bad outcomes
#

ct[['Expected_Bads', 'Expected_Goods']] = df0
columns = ['Bads', 'Goods', 'Total', 'Expected_Bads', 'Expected_Goods']
ct.columns = columns