Example #1
0
def cramerphi_chi2(data0, data1):
    n = len(data0)
    assert n == len(data1)
    if n == 0:
        return float('NaN'), 0, 0
    index0 = dict((x, i) for i, x in enumerate(sorted(set(data0))))
    index1 = dict((x, i) for i, x in enumerate(sorted(set(data1))))
    data0 = numpy.array([index0[d] for d in data0])
    data1 = numpy.array([index1[d] for d in data1])
    assert data0.ndim == 1
    assert data1.ndim == 1
    unique0 = numpy.unique(data0)
    unique1 = numpy.unique(data1)
    n0 = len(unique0)
    n1 = len(unique1)
    min_levels = min(n0, n1)
    if min_levels == 1:
        # No variation in at least one column, so no notion of
        # correlation.
        return float('NaN'), n0, n1
    ct = numpy.zeros((n0, n1), dtype=int)
    for i0, x0 in enumerate(unique0):
        for i1, x1 in enumerate(unique1):
            matches0 = numpy.array(data0 == x0, dtype=int)
            matches1 = numpy.array(data1 == x1, dtype=int)
            ct[i0][i1] = numpy.dot(matches0, matches1)
    # Compute observed chi^2 statistic.
    chi2 = stats.chi2_contingency(ct)
    return chi2, n0, n1
Example #2
0
def cramerphi_chi2(data0, data1):
    n = len(data0)
    assert n == len(data1)
    if n == 0:
        return float('NaN'), 0, 0
    index0 = dict((x, i) for i, x in enumerate(sorted(set(data0))))
    index1 = dict((x, i) for i, x in enumerate(sorted(set(data1))))
    data0 = numpy.array([index0[d] for d in data0])
    data1 = numpy.array([index1[d] for d in data1])
    assert data0.ndim == 1
    assert data1.ndim == 1
    unique0 = numpy.unique(data0)
    unique1 = numpy.unique(data1)
    n0 = len(unique0)
    n1 = len(unique1)
    min_levels = min(n0, n1)
    if min_levels == 1:
        # No variation in at least one column, so no notion of
        # correlation.
        return float('NaN'), n0, n1
    ct = numpy.zeros((n0, n1), dtype=int)
    for i0, x0 in enumerate(unique0):
        for i1, x1 in enumerate(unique1):
            matches0 = numpy.array(data0 == x0, dtype=int)
            matches1 = numpy.array(data1 == x1, dtype=int)
            ct[i0][i1] = numpy.dot(matches0, matches1)
    # Compute observed chi^2 statistic.
    chi2 = stats.chi2_contingency(ct)
    return chi2, n0, n1
Example #3
0
def test_chi2_contingency():
    assert stats.chi2_contingency([[42]]) == 0.
    assert relerr(7.66, stats.chi2_contingency([[4,2,3], [3,16,2]])) < 0.01