def cramerphi_chi2(data0, data1): n = len(data0) assert n == len(data1) if n == 0: return float('NaN'), 0, 0 index0 = dict((x, i) for i, x in enumerate(sorted(set(data0)))) index1 = dict((x, i) for i, x in enumerate(sorted(set(data1)))) data0 = numpy.array([index0[d] for d in data0]) data1 = numpy.array([index1[d] for d in data1]) assert data0.ndim == 1 assert data1.ndim == 1 unique0 = numpy.unique(data0) unique1 = numpy.unique(data1) n0 = len(unique0) n1 = len(unique1) min_levels = min(n0, n1) if min_levels == 1: # No variation in at least one column, so no notion of # correlation. return float('NaN'), n0, n1 ct = numpy.zeros((n0, n1), dtype=int) for i0, x0 in enumerate(unique0): for i1, x1 in enumerate(unique1): matches0 = numpy.array(data0 == x0, dtype=int) matches1 = numpy.array(data1 == x1, dtype=int) ct[i0][i1] = numpy.dot(matches0, matches1) # Compute observed chi^2 statistic. chi2 = stats.chi2_contingency(ct) return chi2, n0, n1
def test_chi2_contingency(): assert stats.chi2_contingency([[42]]) == 0. assert relerr(7.66, stats.chi2_contingency([[4,2,3], [3,16,2]])) < 0.01