Ejemplo n.º 1
0
def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))

    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)
def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))

    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)
Ejemplo n.º 3
0
def compute_mi(cov_xy=0.5, n_bins=100):
    """Analytic computation of MI using binned 
        2D Gaussian

    Arguments:
        cov_xy (list): Off-diagonal elements of covariance
            matrix
        n_bins (int): Number of bins to "quantize" the
            continuous 2D Gaussian
    """
    cov = [[1, cov_xy], [cov_xy, 1]]
    data = sample(cov=cov)
    # get joint distribution samples
    # perform histogram binning
    joint, edge = np.histogramdd(data, bins=n_bins)
    joint /= joint.sum()
    eps = np.finfo(float).eps
    joint[joint < eps] = eps
    # compute marginal distributions
    x, y = margins(joint)

    xy = x * y
    xy[xy < eps] = eps
    # MI is P(X,Y)*log(P(X,Y)/P(X)*P(Y))
    mi = joint * np.log(joint / xy)
    mi = mi.sum()
    print("Computed MI: %0.6f" % mi)
    return mi
Ejemplo n.º 4
0
def marginal(dist, dim):
    """
    Compute marginal of distribution dist along axis dim.
    
    Note:
        Computes all marginals and returns the one asked, so might be slow
    
    """
    ms = margins(dist)   # compute all marginals
    return np.squeeze(ms[dim])   # pick out the desired one
Ejemplo n.º 5
0
def stdres(observed, expected):
    n = observed.sum()
    rsum, csum = margins(observed)
    # With integers, the calculation
    #     csum * rsum * (n - rsum) * (n - csum)
    # might overflow, so convert rsum and csum to floating point.
    rsum = rsum.astype(np.float64)
    csum = csum.astype(np.float64)
    v = csum * rsum * (n - rsum) * (n - csum) / n**3
    return (observed - expected) / np.sqrt(v)
Ejemplo n.º 6
0
def compute_mi(cov_xy=0.9, n_bins=100):
    cov = [[1, cov_xy], [cov_xy, 1]]
    data = sample(cov=cov)
    joint, edge = np.histogramdd(data, bins=n_bins)
    joint /= joint.sum()
    eps = np.finfo(float).eps
    joint[joint < eps] = eps
    x, y = margins(joint)
    xy = x * y
    xy[xy < eps] = eps
    mi = joint * np.log(joint / xy)
    mi = mi.sum()
    print("Computed MI:", mi)
    return mi
def std_res(observed, expected):
    """
    :param observed:    an 2-by-n numpy array containing the observed frequencies
    :param expected:    an 2-by-n numpy array containing the expected frequencies under the null hypothesis
    :return res:        the standardized Pearson's residuals indicating which dimensions in the observed data show
                        the stronger deviation from the expected frequencies
    """

    n = observed.sum()
    rsum, csum = contingency.margins(observed)
    v = csum * rsum * (n - rsum) * (n - csum) / float(n**3)
    res = (observed - expected) / np.sqrt(v)

    return res
def compute_mi(cov_xy=0.5, n_bins=100):
    cov=[[1, cov_xy], [cov_xy, 1]]
    data = sample(cov=cov)
    # get joint distribution samples
    # perform histogram binning
    joint, edge = np.histogramdd(data, bins=n_bins)
    joint /= joint.sum()
    eps = np.finfo(float).eps
    joint[joint<eps] = eps
    # compute marginal distributions
    x, y = margins(joint)

    xy = x*y
    xy[xy<eps] = eps
    # MI is P(X,Y)*log(P(X,Y)/P(X)*P(Y))
    mi = joint*np.log(joint/xy)
    mi = mi.sum()
    print("Computed MI: %0.6f" % mi)
    return mi
Ejemplo n.º 9
0
def uncertainty(j):
    """
    Use the Shannon entropy of a marginal distribution and Shannon entropy of the joint distribution
    to calculate nonlinear dependence using our uncertainty of a probability distribution. The joint
    distribution j between X and Y must be known as a 2-D array.
    """
    x, y = margins(j) # x and y describe the joint probability margins (marginal distribution)
    Hx = 0 # Shannon entropy of the x marginal distribution
    for i in x:
        Hx += i*np.log(i)
    Hx = -Hx # flip the sign for entropy
    Hy = 0 # mutatis mutandis for y
    for i in y:
        Hy += i.np.log(i)
    Hy = -Hy
    Hxy = 0 # Shannon entropy of the joint distribution
    for i in x:
        for j in y:
            Hxy += (i*j)*np.log(i*j)
    Hxy = -Hxy
    return Hx + Hy - Hxy # This can be a measure of mutual information from a joint probability distribution
Ejemplo n.º 10
0
def marginal(dist, dim):
    """Compute marginal of distribution dist along axis dim"""
    # Note, this computes all marginals and returns the one asked, so this
    # might be slow in some cases.
    ms = margins(dist)  # compute all marginals
    return np.squeeze(ms[dim])  # get the right one
Ejemplo n.º 11
0
def stdres(observed, expected):
    n = observed.sum()
    rsum, csum = margins(observed)
    v = csum * rsum * (n - rsum) * (n - csum) / n**3
    return (observed - expected) / np.sqrt(v)