def test_cochransq(): # example from dataplot docs, Conovover p. 253 # http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/cochran.htm x = np.array( [ [1, 1, 1], [1, 1, 1], [0, 1, 0], [1, 1, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1], [1, 1, 1], ] ) res_qstat = 2.8 res_pvalue = 0.246597 assert_almost_equal(cochrans_q(x), [res_qstat, res_pvalue]) # equivalence of mcnemar and cochranq for 2 samples a, b = x[:, :2].T assert_almost_equal(mcnemar(a, b, exact=False, correction=False), cochrans_q(x[:, :2]))
def test_cochransq(): #example from dataplot docs, Conovover p. 253 #http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/cochran.htm x = np.array([[1, 1, 1], [1, 1, 1], [0, 1, 0], [1, 1, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 1, 0], [0, 0, 1], [0, 1, 0], [1, 1, 1], [1, 1, 1]]) res_qstat = 2.8 res_pvalue = 0.246597 assert_almost_equal(cochrans_q(x), [res_qstat, res_pvalue]) #equivalence of mcnemar and cochranq for 2 samples a, b = x[:, :2].T assert_almost_equal(mcnemar(a, b, exact=False, correction=False), cochrans_q(x[:, :2]))
def test_cochransq3(): # another example compared to SAS # in frequency weight format dt = [('A', 'S1'), ('B', 'S1'), ('C', 'S1'), ('count', int)] dta = np.array([('F', 'F', 'F', 6), ('U', 'F', 'F', 2), ('F', 'F', 'U', 16), ('U', 'F', 'U', 4), ('F', 'U', 'F', 2), ('U', 'U', 'F', 6), ('F', 'U', 'U', 4), ('U', 'U', 'U', 6)], dt) cases = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 1], [0, 1, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]]) count = np.array([ 6, 2, 16, 4, 2, 6, 4, 6]) data = np.repeat(cases, count, 0) res = cochrans_q(data) assert_allclose(res, [8.4706, 0.0145], atol=5e-5)
def cochranQ(): """Cochran's Q test: 12 subjects are asked to perform 3 tasks. The outcome of each task is "success" or "failure". The results are coded 0 for failure and 1 for success. In the example, subject 1 was successful in task 2, but failed tasks 1 and 3. Is there a difference between the performance on the three tasks? """ tasks = np.array( [ [0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0], [1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1], [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0], ] ) # I prefer a DataFrame here, as it indicates directly what the values mean df = pd.DataFrame(tasks.T, columns=["Task1", "Task2", "Task3"]) # --- >>> START stats <<< --- (Q, pVal) = cochrans_q(df) # --- >>> STOP stats <<< --- print("\nCOCHRAN'S Q -----------------------------------------------------") print("Q = {0:5.3f}, p = {1:5.3f}".format(Q, pVal)) if pVal < 0.05: print("There is a significant difference between the three tasks.")
def core(x): ''' x: pd.DataFrame() ''' n = len(x) freq = x.apply(lambda a: a.value_counts()).T perc = freq.apply(lambda a: a / n) f1 = freq.T f1 = f1.reset_index() f1.index = ['频数', '频数'] f1 = f1.reset_index().set_index(['level_0', 'index']) f2 = perc.T f2 = f2.reset_index() f2.index = ['百分比', '百分比'] f2 = f2.reset_index().set_index(['level_0', 'index']) f = f1.append(f2).T f.columns.names = [None, None] z, p = cochrans_q(x) df = pd.Series({ '样本量': n, 'CochransQ 统计量': z, 'p': p, 'df': x.shape[1] - 1 }) df = df.to_frame().T.set_index('样本量') res = {'频数分析结果': f, 'CochranQ检验结果': df} return res
def test_cochransq2(): # from an example found on web, verifies 13.286 data = np.array(''' 0 0 0 1 0 0 0 1 0 0 0 1 1 1 1 1 1 0 0 1 0 1 0 1 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 1'''.split(), int).reshape(-1, 4) res = cochrans_q(data) assert_allclose(res, [13.2857143, 0.00405776], rtol=1e-6)
def cochranQ(): '''Cochran's Q test: 12 subjects are asked to perform 3 tasks. The outcome of each task is "success" or "failure". The results are coded 0 for failure and 1 for success. In the example, subject 1 was successful in task 2, but failed tasks 1 and 3. Is there a difference between the performance on the three tasks? ''' tasks = np.array([[0,1,1,0,1,0,0,1,0,0,0,0], [1,1,1,0,0,1,0,1,1,1,1,1], [0,0,1,0,0,1,0,0,0,0,0,0]]) # I prefer a DataFrame here, as it indicates directly what the values mean df = pd.DataFrame(tasks.T, columns = ['Task1', 'Task2', 'Task3']) # --- >>> START stats <<< --- (Q, pVal) = cochrans_q(df) # --- >>> STOP stats <<< --- print('\nCOCHRAN\'S Q -----------------------------------------------------') print('Q = {0:5.3f}, p = {1:5.3f}'.format(Q, pVal)) if pVal < 0.05: print("There is a significant difference between the three tasks.")
def test_cochransq3(): # another example compared to SAS # in frequency weight format dt = [("A", "S1"), ("B", "S1"), ("C", "S1"), ("count", int)] dta = np.array( [ ("F", "F", "F", 6), ("U", "F", "F", 2), ("F", "F", "U", 16), ("U", "F", "U", 4), ("F", "U", "F", 2), ("U", "U", "F", 6), ("F", "U", "U", 4), ("U", "U", "U", 6), ], dt, ) cases = np.array([[0, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 1], [0, 1, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]]) count = np.array([6, 2, 16, 4, 2, 6, 4, 6]) data = np.repeat(cases, count, 0) res = cochrans_q(data) assert_allclose(res, [8.4706, 0.0145], atol=5e-5)
def cochran_q_test(pdf, var_names): q, p = cochrans_q(pdf[var_names]) return _("Result of Cochran's Q test") + ': <i>Q</i>(%d, <i>N</i> = %d) = %0.3g, %s\n' % \ (len(var_names)-1, len(pdf[var_names[0]]), q, cs_util.print_p(p))