def test_compute_mi_cd(): # To test define a joint distribution as follows: # p(x, y) = p(x) p(y | x) # X ~ Bernoulli(p) # (Y | x = 0) ~ Uniform(-1, 1) # (Y | x = 1) ~ Uniform(0, 2) # Use the following formula for mutual information: # I(X; Y) = H(Y) - H(Y | X) # Two entropies can be computed by hand: # H(Y) = -(1-p)/2 * ln((1-p)/2) - p/2*log(p/2) - 1/2*log(1/2) # H(Y | X) = ln(2) # Now we need to implement sampling from out distribution, which is # done easily using conditional distribution logic. n_samples = 1000 rng = check_random_state(0) for p in [0.3, 0.5, 0.7]: x = rng.uniform(size=n_samples) > p y = np.empty(n_samples) mask = x == 0 y[mask] = rng.uniform(-1, 1, size=np.sum(mask)) y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)) I_theory = -0.5 * ((1 - p) * np.log(0.5 * (1 - p)) + p * np.log(0.5 * p) + np.log(0.5)) - np.log(2) # Assert the same tolerance. for n_neighbors in [3, 5, 7]: I_computed = _compute_mi(x, y, True, False, n_neighbors) assert_almost_equal(I_computed, I_theory, 1)
def test_compute_mi_cc(): # For two continuous variables a good approach is to test on bivariate # normal distribution, where mutual information is known. # Mean of the distribution, irrelevant for mutual information. mean = np.zeros(2) # Setup covariance matrix with correlation coeff. equal 0.5. sigma_1 = 1 sigma_2 = 10 corr = 0.5 cov = np.array([[sigma_1**2, corr * sigma_1 * sigma_2], [corr * sigma_1 * sigma_2, sigma_2**2]]) # True theoretical mutual information. I_theory = (np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log(np.linalg.det(cov))) rng = check_random_state(0) Z = rng.multivariate_normal(mean, cov, size=1000) x, y = Z[:, 0], Z[:, 1] # Theory and computed values won't be very close, assert that the # first figures after decimal point match. for n_neighbors in [3, 5, 7]: I_computed = _compute_mi(x, y, False, False, n_neighbors) assert_almost_equal(I_computed, I_theory, 1)
def test_compute_mi_cc(global_dtype): # For two continuous variables a good approach is to test on bivariate # normal distribution, where mutual information is known. # Mean of the distribution, irrelevant for mutual information. mean = np.zeros(2) # Setup covariance matrix with correlation coeff. equal 0.5. sigma_1 = 1 sigma_2 = 10 corr = 0.5 cov = np.array([ [sigma_1**2, corr * sigma_1 * sigma_2], [corr * sigma_1 * sigma_2, sigma_2**2], ]) # True theoretical mutual information. I_theory = np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log( np.linalg.det(cov)) rng = check_random_state(0) Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype, copy=False) x, y = Z[:, 0], Z[:, 1] # Theory and computed values won't be very close # We here check with a large relative tolerance for n_neighbors in [3, 5, 7]: I_computed = _compute_mi(x, y, False, False, n_neighbors) assert_allclose(I_computed, I_theory, rtol=1e-1)
def test_compute_mi_cd_unique_label(): # Test that adding unique label doesn't change MI. n_samples = 100 x = np.random.uniform(size=n_samples) > 0.5 y = np.empty(n_samples) mask = x == 0 y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)) y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)) mi_1 = _compute_mi(x, y, True, False) x = np.hstack((x, 2)) y = np.hstack((y, 10)) mi_2 = _compute_mi(x, y, True, False) assert mi_1 == mi_2
def test_compute_mi_cd_unique_label(global_dtype): # Test that adding unique label doesn't change MI. n_samples = 100 x = np.random.uniform(size=n_samples) > 0.5 y = np.empty(n_samples, global_dtype) mask = x == 0 y[mask] = np.random.uniform(-1, 1, size=np.sum(mask)) y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask)) mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False) x = np.hstack((x, 2)) y = np.hstack((y, 10)) mi_2 = _compute_mi(x, y, x_discrete=True, y_discrete=False) assert_allclose(mi_1, mi_2)
def test_compute_mi_dd(): # In discrete case computations are straightforward and can be done # by hand on given vectors. x = np.array([0, 1, 1, 0, 0]) y = np.array([1, 0, 0, 0, 1]) H_x = H_y = -(3 / 5) * np.log(3 / 5) - (2 / 5) * np.log(2 / 5) H_xy = -1 / 5 * np.log(1 / 5) - 2 / 5 * np.log(2 / 5) - 2 / 5 * np.log(2 / 5) I_xy = H_x + H_y - H_xy assert_allclose(_compute_mi(x, y, x_discrete=True, y_discrete=True), I_xy)