def test_gaussian_with_condition_equal_to_y(self) -> None: # MI(X;Y | Y) should be equal to 0 rng = np.random.default_rng(4) cov = np.array([[1, 0.6], [0.6, 1]]) data = rng.multivariate_normal([0, 0], cov, size=314) x = data[:, 0] y = data[:, 1] actual = _estimate_conditional_mi(x, y, y, k=4) self.assertAlmostEqual(actual, 0.0, delta=0.001)
def test_three_gaussians(self) -> None: # First example in doi:10.1103/PhysRevLett.99.204101, # we know the analytic expression for conditional MI of a three- # dimensional Gaussian random variable. Here, the covariance matrix # is not particularly interesting. The expected CMI expression # contains determinants of submatrices. rng = np.random.default_rng(5) cov = np.array([[1, 1, 1], [1, 4, 1], [1, 1, 9]]) data = rng.multivariate_normal([0, 0, 0], cov, size=1000) actual = _estimate_conditional_mi(data[:, 0], data[:, 1], data[:, 2]) expected = 0.5 * (log(8) + log(35) - log(9) - log(24)) self.assertAlmostEqual(actual, expected, delta=0.015)
def test_gaussian_with_independent_condition(self) -> None: # In this case, the results should be same as in ordinary MI cases = [ (0.5, 200, 3, 0.03), (0.75, 400, 3, 0.01), (-0.9, 4000, 5, 0.03), ] for (rho, n, k, delta) in cases: with self.subTest(rho=rho, n=n, k=k): rng = np.random.default_rng(0) cov = np.array([[1, rho], [rho, 1]]) data = rng.multivariate_normal([0, 0], cov, size=n) x = data[:,0] y = data[:,1] cond = rng.uniform(0, 1, size=n) actual = _estimate_conditional_mi(x, y, cond, k=k) expected = -0.5 * log(1 - rho**2) self.assertAlmostEqual(actual, expected, delta=delta)
def test_four_gaussians(self) -> None: # As above, but now the condition is two-dimensional. # The covariance matrix is defined by transforming a standard normal # distribution (u1, u2, u3, u4) as follows: # x = u1, # y = u2 + u3 + 2*u4, # z1 = 2*u1 + u3, # z2 = u1 + u4. # Unconditionally, x and y are independent, but conditionally they aren't. rng = np.random.default_rng(25) cov = np.array([[1, 0, 2, 1], [0, 6, 1, 2], [2, 1, 5, 2], [1, 2, 2, 2]]) # The data needs to be normalized for estimation accuracy, # and the sample size must be quite large data = rng.multivariate_normal([0, 0, 0, 0], cov, size=8000) data = data / np.sqrt(np.var(data, axis=0)) actual = _estimate_conditional_mi(data[:, 0], data[:, 1], data[:, 2:]) expected = 0.64964 self.assertAlmostEqual(actual, expected, delta=0.04)