def test_SquareTable_from_data():

    np.random.seed(434)
    df = pd.DataFrame(index=range(100), columns=["v1", "v2"])
    df["v1"] = np.random.randint(0, 5, 100)
    df["v2"] = np.random.randint(0, 5, 100)
    table = pd.crosstab(df["v1"], df["v2"])

    rslt1 = ctab.SquareTable(table)
    rslt2 = ctab.SquareTable.from_data(df)
    rslt3 = ctab.SquareTable(np.asarray(table))

    assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text())

    assert_equal(rslt2.summary().as_text(), rslt3.summary().as_text())
Ejemplo n.º 2
0
def core(tsx, tsy):
    '''
    input
    --------
      tsx: 定类型数据
      tsy: 定类型数据
    '''

    crosstab = pd.crosstab(tsx, tsy)
    crosstab2 = pd.crosstab(tsx, tsy, margins=True)
    crosstab2 = crosstab2.rename(columns={'All': '总计'}, index={'All': '总计'})

    if crosstab.shape == (2, 2):
        res = contingency_tables.mcnemar(crosstab)
        method = 'mcnemar'

    else:
        res = contingency_tables.SquareTable(crosstab).symmetry(
            method="bowker")
        method = 'bowker'

    chi2 = res.statistic
    p = res.pvalue
    expected = stats.contingency.expected_freq(crosstab)

    dfe = pd.DataFrame(expected, columns=tsy.unique(),
                       index=tsx.unique()).round(3)
    dfte = crosstab.astype(str) + ' (' + dfe.astype(str) + ')'
    dfte['总计'] = crosstab2['总计']
    dfte.loc['总计'] = crosstab2.loc['总计']
    dfte['检验方法'] = method
    dfte['卡方统计量'] = chi2
    dfte['p-值'] = p
    dfte.index.name = '类别'
    return dfte.reset_index().set_index(['检验方法', '卡方统计量', 'p-值', '类别'])
Ejemplo n.º 3
0
def test_SquareTable_from_data():

    np.random.seed(434)
    df = pd.DataFrame(index=range(100), columns=["v1", "v2"])
    df["v1"] = np.random.randint(0, 5, 100)
    df["v2"] = np.random.randint(0, 5, 100)
    table = pd.crosstab(df["v1"], df["v2"])

    rslt1 = ctab.SquareTable(table)
    rslt2 = ctab.SquareTable.from_data(df)
    rslt3 = ctab.SquareTable(np.asarray(table))

    assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text())

    assert_equal(rslt2.summary().as_text(), rslt3.summary().as_text())

    s = str(rslt1)
    assert_equal(s.startswith('A 5x5 contingency table with counts:'), True)
    assert_equal(rslt1.table[0, 0], 8.)
Ejemplo n.º 4
0
def test_SquareTable_nonsquare():

    tab = [[1, 0, 3], [2, 1, 4], [3, 0, 5]]
    df = pd.DataFrame(tab, index=[0, 1, 3], columns=[0, 2, 3])

    df2 = ctab.SquareTable(df, shift_zeros=False)

    e = np.asarray([[1, 0, 0, 3], [2, 0, 1, 4], [0, 0, 0, 0], [3, 0, 0, 5]],
                   dtype=np.float64)

    assert_equal(e, df2.table)