def test_SquareTable_from_data(): np.random.seed(434) df = pd.DataFrame(index=range(100), columns=["v1", "v2"]) df["v1"] = np.random.randint(0, 5, 100) df["v2"] = np.random.randint(0, 5, 100) table = pd.crosstab(df["v1"], df["v2"]) rslt1 = ctab.SquareTable(table) rslt2 = ctab.SquareTable.from_data(df) rslt3 = ctab.SquareTable(np.asarray(table)) assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text()) assert_equal(rslt2.summary().as_text(), rslt3.summary().as_text())
def core(tsx, tsy): ''' input -------- tsx: 定类型数据 tsy: 定类型数据 ''' crosstab = pd.crosstab(tsx, tsy) crosstab2 = pd.crosstab(tsx, tsy, margins=True) crosstab2 = crosstab2.rename(columns={'All': '总计'}, index={'All': '总计'}) if crosstab.shape == (2, 2): res = contingency_tables.mcnemar(crosstab) method = 'mcnemar' else: res = contingency_tables.SquareTable(crosstab).symmetry( method="bowker") method = 'bowker' chi2 = res.statistic p = res.pvalue expected = stats.contingency.expected_freq(crosstab) dfe = pd.DataFrame(expected, columns=tsy.unique(), index=tsx.unique()).round(3) dfte = crosstab.astype(str) + ' (' + dfe.astype(str) + ')' dfte['总计'] = crosstab2['总计'] dfte.loc['总计'] = crosstab2.loc['总计'] dfte['检验方法'] = method dfte['卡方统计量'] = chi2 dfte['p-值'] = p dfte.index.name = '类别' return dfte.reset_index().set_index(['检验方法', '卡方统计量', 'p-值', '类别'])
def test_SquareTable_from_data(): np.random.seed(434) df = pd.DataFrame(index=range(100), columns=["v1", "v2"]) df["v1"] = np.random.randint(0, 5, 100) df["v2"] = np.random.randint(0, 5, 100) table = pd.crosstab(df["v1"], df["v2"]) rslt1 = ctab.SquareTable(table) rslt2 = ctab.SquareTable.from_data(df) rslt3 = ctab.SquareTable(np.asarray(table)) assert_equal(rslt1.summary().as_text(), rslt2.summary().as_text()) assert_equal(rslt2.summary().as_text(), rslt3.summary().as_text()) s = str(rslt1) assert_equal(s.startswith('A 5x5 contingency table with counts:'), True) assert_equal(rslt1.table[0, 0], 8.)
def test_SquareTable_nonsquare(): tab = [[1, 0, 3], [2, 1, 4], [3, 0, 5]] df = pd.DataFrame(tab, index=[0, 1, 3], columns=[0, 2, 3]) df2 = ctab.SquareTable(df, shift_zeros=False) e = np.asarray([[1, 0, 0, 3], [2, 0, 1, 4], [0, 0, 0, 0], [3, 0, 0, 5]], dtype=np.float64) assert_equal(e, df2.table)