Exemple #1
0
    def test_lag_too_small(self) -> None:
        x = [1, 2, 3, 4]
        y = [5, 6, 7, 8]

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, lag=-4)
        self.assertEqual(str(cm.exception), TOO_LARGE_LAG_MSG)
Exemple #2
0
    def test_x_with_too_large_dimension(self) -> None:
        x = np.zeros((10, 2, 3))
        y = np.zeros(10)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x)
        self.assertEqual(str(cm.exception), X_WRONG_DIMENSION_MSG)
Exemple #3
0
    def test_cond_lag_leaves_no_y_observations(self) -> None:
        x = [1, 2, 3, 4]
        y = [5, 6, 7, 8]

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, lag=1, cond=y, cond_lag=4)
        self.assertEqual(str(cm.exception), TOO_LARGE_LAG_MSG)
Exemple #4
0
    def test_lag_leaves_too_few_observations(self) -> None:
        x = np.zeros(30)
        y = np.zeros(30)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, lag=[-5, 10], k=15)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)
Exemple #5
0
    def test_mask_with_wrong_length(self) -> None:
        x = [1, 2, 3, 4]
        y = [5, 6, 7, 8]

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, mask=[False, True])
        self.assertEqual(str(cm.exception), INVALID_MASK_LENGTH_MSG)
Exemple #6
0
    def test_inputs_of_different_length(self) -> None:
        x = np.zeros(10)
        y = np.zeros(20)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(x, y)
        self.assertEqual(str(cm.exception), X_Y_DIFFERENT_LENGTH_MSG)
Exemple #7
0
    def test_inputs_shorter_than_k(self) -> None:
        x = np.zeros(3)
        y = np.zeros(3)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(x, y, k=5)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)
Exemple #8
0
    def test_y_with_wrong_dimension(self) -> None:
        x = np.zeros(10)
        y = np.zeros((10, 2))

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x)
        self.assertEqual(str(cm.exception), Y_WRONG_DIMENSION_MSG)
Exemple #9
0
    def test_x_and_cond_different_length(self) -> None:
        x = np.zeros(10)
        y = np.zeros(20)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(x, x, cond=y)
        self.assertEqual(str(cm.exception), X_COND_DIFFERENT_LENGTH_MSG)
Exemple #10
0
    def test_unmasked_nans_in_discrete_y_are_rejected(self) -> None:
        x = np.zeros(100)
        y = np.zeros(100)
        y[25] = np.nan

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, discrete_y=True)
        self.assertEqual(str(cm.exception), NANS_LEFT_MSG)
Exemple #11
0
    def test_mask_leaves_no_observations(self) -> None:
        x = np.zeros(30)
        y = np.zeros(30)
        mask = np.full(30, False)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, mask=mask)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)
Exemple #12
0
    def test_mask_with_wrong_dimension(self) -> None:
        x = np.zeros(10)
        y = np.zeros(10)
        mask = np.zeros((10, 2), dtype=bool)

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, mask=mask)
        self.assertEqual(str(cm.exception), MASK_WRONG_DIMENSION_MSG)
Exemple #13
0
    def test_cond_and_mask_as_list(self) -> None:
        x = [1, 2, 3, 4, 5, math.nan]
        y = [2, 4, 6, 8, 10, 12]
        cond = [1, 1, 2, 3, 5, 8]
        mask = [True, True, True, True, True, False]

        # Not checking for the (bogus) result, just that this
        # type-checks and does not crash
        estimate_mi(y, x, cond=cond, mask=mask)
Exemple #14
0
    def test_k_must_be_positive(self) -> None:
        x = np.zeros(30)
        y = np.zeros(30)

        for k in [-2, 0]:
            with self.subTest(k=k):
                with self.assertRaises(ValueError) as cm:
                    estimate_mi(x, y, k=k)
                self.assertEqual(str(cm.exception), K_NEGATIVE_MSG)
Exemple #15
0
    def test_mask_with_mixed_element_types(self) -> None:
        # Integer mask leads to difficult to understand subsetting behavior
        x = [1, 2, 3, 4]
        y = [5, 6, 7, 8]
        mask = [True, 2, 1, 0]

        with self.assertRaises(TypeError) as cm:
            estimate_mi(y, x, mask=mask)
        self.assertEqual(str(cm.exception), INVALID_MASK_TYPE_MSG)
Exemple #16
0
    def test_mask_and_lag_leave_too_few_observations(self) -> None:
        x = np.zeros(30)
        y = np.zeros(30)
        mask = np.full(30, True)
        mask[:15] = False

        with self.assertRaises(ValueError) as cm:
            estimate_mi(y, x, lag=-5, mask=mask, k=10)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)
Exemple #17
0
    def test_unconditional_mi_independence(self) -> None:
        rng = np.random.default_rng(0)
        cov = [[1, 0], [0, 1]]
        data = rng.multivariate_normal([0, 0], cov, size=20_000)

        mi_3 = estimate_mi(data[:, 0], data[:, 1], k=3)
        mi_100 = estimate_mi(data[:, 0], data[:, 1], k=100)

        # Large k should be better for independence testing
        self.assertAlmostEqual(mi_100, 0.0, delta=0.004)
        self.assertGreater(mi_3 - mi_100, 0.002)
Exemple #18
0
    def test_discrete_y(self) -> None:
        # See the two_disjoint_uniforms algorithm test
        rng = np.random.default_rng(51)
        y = rng.choice([0, 2], size=800)
        x = rng.uniform(y, y + 1)

        mi = estimate_mi(y, x, discrete_y=True)
        self.assertAlmostEqual(mi, math.log(2), delta=0.02)

        # If the parameters are put the wrong way, a warning is emitted
        with self.assertWarns(UserWarning):
            _ = estimate_mi(x, y, discrete_y=True)
Exemple #19
0
    def test_conditional_mi_independence(self) -> None:
        # X and X+Y are independent given Y
        rng = np.random.default_rng(0)
        x = rng.normal(0.0, 1.0, size=20_000)
        y = rng.normal(0.0, 1.0, size=20_000)

        mi_3 = estimate_mi(x, x + y, cond=x, k=3)
        mi_100 = estimate_mi(x, x + y, cond=x, k=100)

        # Large k should be better for independence testing here as well
        self.assertAlmostEqual(mi_100, 0.0, delta=0.005)
        self.assertGreater(abs(mi_3 - mi_100), 0.05)
Exemple #20
0
    def test_preprocess(self) -> None:
        # The highly different variances should cause issues
        rng = np.random.default_rng(2020_07_16)
        cov = np.asarray([[1, 0.6], [0.6, 1]])
        data = rng.multivariate_normal([0, 0], cov, size=800)
        x = 1e3 * data[:, 0]
        y = 1e-3 * data[:, 1]

        mi_unscaled = estimate_mi(y, x, preprocess=False, normalize=True)
        mi_scaled = estimate_mi(y, x, preprocess=True, normalize=True)

        self.assertNotAlmostEqual(mi_unscaled, 0.6, delta=0.1)
        self.assertAlmostEqual(mi_scaled, 0.6, delta=0.03)
Exemple #21
0
    def test_unconditional_mi_bias(self) -> None:
        # A highly correlated distribution
        rng = np.random.default_rng(0)
        cov = [[1, 0.8], [0.8, 1]]
        data = rng.multivariate_normal([0, 0], cov, size=20_000)

        mi_3 = estimate_mi(data[:, 0], data[:, 1], k=3)
        mi_100 = estimate_mi(data[:, 0], data[:, 1], k=100)

        # Large k will have some bias, small k should not
        expected = -0.5 * log(1 - 0.8**2)
        self.assertAlmostEqual(mi_3, expected, delta=0.005)
        self.assertGreater(abs(mi_100 - expected),
                           abs(mi_3 - expected) + 0.005)
Exemple #22
0
    def test_conditional_mi_with_multidimensional_cond(self) -> None:
        # X, Y, Z are standard normal and W = X+Y+Z.
        # Therefore I(X;W) < I(X;W | Y) < I(X;W | Y,Z).
        rng = np.random.default_rng(16)
        x = rng.normal(size=600)
        y = rng.normal(size=600)
        z = rng.normal(size=600)
        w = x + y + z

        single_cond = estimate_mi(w, x, cond=y)
        many_cond = estimate_mi(w, x, cond=np.asarray([y, z]).T)

        self.assertEqual(many_cond.shape, (1, 1))
        self.assertAlmostEqual(single_cond.item(), 0.33, delta=0.03)
        self.assertGreater(many_cond.item(), 1.0)
Exemple #23
0
    def test_pandas_data_frame(self) -> None:
        # Same data as in test_array_from_file()
        script_path = os.path.dirname(os.path.abspath(__file__))
        data_path = os.path.join(script_path, "example_data.csv")
        data = pd.read_csv(data_path)

        actual = estimate_mi(data["y"],
                             data[["x1", "x2", "x3"]],
                             lag=[0, 1, 3],
                             k=5)  # type: pd.DataFrame

        # The returned object is a Pandas data frame, with row and column names!
        self.assertIsInstance(actual, pd.DataFrame)

        # y(t) depends on x1(t+1)
        self.assertAlmostEqual(actual.loc[0, "x1"], 0.0, delta=0.04)
        self.assertGreater(actual.loc[1, "x1"], 0.4)
        self.assertAlmostEqual(actual.loc[3, "x1"], 0.0, delta=0.04)

        # y(t) is completely independent of x2
        for i in [0, 1, 3]:
            self.assertAlmostEqual(actual.loc[i, "x2"], 0.0, delta=0.04)

        # y(t) depends on abs(x3(t+3))
        self.assertAlmostEqual(actual.loc[0, "x3"], 0.0, delta=0.04)
        self.assertAlmostEqual(actual.loc[1, "x3"], 0.0, delta=0.04)
        self.assertGreater(actual.loc[3, "x3"], 0.15)
Exemple #24
0
    def test_conditional_mi_with_mask_and_lags(self) -> None:
        # This is TestEstimateConditionalMi.test_three_gaussians(),
        # but with Z lagged by 2 and most of the observations deleted.
        rng = np.random.default_rng(12)
        cov = np.array([[1, 1, 1], [1, 4, 1], [1, 1, 9]])

        data = rng.multivariate_normal([0, 0, 0], cov, size=2000)
        mask = np.arange(2000) % 5 == 0

        x = np.zeros(2000)
        y = np.zeros(2000)
        z = np.zeros(2000)
        x[mask] = data[:, 0][mask]
        y[mask] = data[:, 1][mask]
        z[np.arange(2000) % 5 == 3] = data[:, 2][mask]

        lags = [0, -1]

        # Don't preprocess because with cond_lag=1 the cond array is all zeros
        actual = estimate_mi(y,
                             x,
                             lag=lags,
                             cond=z,
                             cond_lag=[2, 1],
                             mask=mask,
                             preprocess=False)
        expected = 0.5 * (math.log(8) + math.log(35) - math.log(9) -
                          math.log(24))

        self.assertAlmostEqual(actual[0, 0], expected, delta=0.03)
        self.assertAlmostEqual(actual[1, 0], 0.0, delta=0.01)
Exemple #25
0
    def test_array_from_file(self) -> None:
        # A realistic use case
        script_path = os.path.dirname(os.path.abspath(__file__))
        data_path = os.path.join(script_path, "example_data.csv")
        data = np.loadtxt(data_path, delimiter=",", skiprows=1)

        for max_threads in [None, 1, 2]:
            with self.subTest(max_threads=max_threads):
                actual = estimate_mi(data[:, 0],
                                     data[:, 1:4],
                                     lag=[0, 1, 3],
                                     k=5,
                                     max_threads=max_threads)

                # The returned object is a plain ndarray
                self.assertIsInstance(actual, np.ndarray)

                # y(t) depends on x1(t+1)
                self.assertAlmostEqual(actual[0, 0], 0.0, delta=0.04)
                self.assertGreater(actual[1, 0], 0.5)
                self.assertAlmostEqual(actual[2, 0], 0.0, delta=0.04)

                # y(t) is completely independent of x2
                for i in range(3):
                    self.assertAlmostEqual(actual[i, 1], 0.0, delta=0.04)

                # y(t) depends on abs(x3(t+3))
                self.assertAlmostEqual(actual[0, 2], 0.0, delta=0.04)
                self.assertAlmostEqual(actual[1, 2], 0.0, delta=0.04)
                self.assertGreater(actual[2, 2], 0.15)
Exemple #26
0
    def test_autocorrelation(self) -> None:
        # Determine the autocorrelation of temperature, conditional on DOY
        afternoon_mask = (self.data.index.hour == 13)
        result = estimate_mi(self.data["Temperature"],
                             self.data["Temperature"],
                             lag=[0, -24, -10 * 24],
                             cond=self.data["DayOfYear"],
                             mask=afternoon_mask,
                             normalize=True)  # type: pd.DataFrame

        # The result is a 3x1 data frame
        self.assertEqual(result.shape, (3, 1))
        self.assertIsInstance(result, pd.DataFrame)

        # Without lag, the autocorrelation coefficient should obviously be 1
        self.assertAlmostEqual(result.loc[0, "Temperature"], 1, delta=0.01)

        # With one day lag, the autocorrelation is still very strong
        self.assertAlmostEqual(result.loc[-24, "Temperature"],
                               0.69,
                               delta=0.01)

        # With ten day lag, the autocorrelation is close to zero
        self.assertAlmostEqual(result.loc[-10 * 24, "Temperature"],
                               0,
                               delta=0.01)
Exemple #27
0
    def test_normalization(self) -> None:
        rng = np.random.default_rng(17)
        cov = np.asarray([[1, 0.6], [0.6, 1]])
        data = rng.multivariate_normal([0, 0], cov, 1000)

        result = estimate_mi(data[:, 0], data[:, 1], normalize=True)

        self.assertAlmostEqual(result, 0.6, delta=0.02)
Exemple #28
0
    def test_conditional_mi_with_separate_lags(self) -> None:
        data, expected = self._create_4d_data()

        actual = estimate_mi(data[:, 1],
                             data[:, 0],
                             cond=data[:, 2:],
                             cond_lag=[[1, -1]])
        self.assertAlmostEqual(actual, expected, delta=0.08)
Exemple #29
0
    def test_mask_as_list(self) -> None:
        x = list(range(300))  # type: List[float]
        for i in range(0, 300, 2):
            x[i] = math.nan

        y = list(range(300, 0, -1))
        mask = [True, False] * 150

        self.assertGreater(estimate_mi(y, x, lag=1, mask=mask), 3)
Exemple #30
0
    def test_drop_nan(self) -> None:
        rng = np.random.default_rng(2020_07_28)
        cov = np.array([[1, 0.8], [0.8, 1]])
        data = rng.multivariate_normal([0, 0], cov, size=1000)
        data[:50, 0] = np.nan
        data[950:, 1] = np.nan

        mi = estimate_mi(data[:, 1], data[:, 0], normalize=True, drop_nan=True)
        self.assertAlmostEqual(mi, 0.8, delta=0.02)