Exemple #1
0
    def test_cond_different_length(self) -> None:
        data = np.full((10, 2), 0.0)
        cond = np.full(9, 0.0)

        with self.assertRaises(ValueError) as cm:
            pairwise_mi(data, cond=cond)
        self.assertEqual(str(cm.exception), X_COND_DIFFERENT_LENGTH_MSG)
Exemple #2
0
    def test_pairwise_mi(self) -> None:
        # Determine the pairwise MI between three variables
        columns = ["Temperature", "WindDir", "DayOfYear"]
        afternoon_mask = (self.data.index.hour == 13)

        uncond = pairwise_mi(self.data[columns], mask=afternoon_mask, normalize=True) # type: pd.DataFrame
        cond_doy = pairwise_mi(self.data[columns], mask=afternoon_mask, normalize=True,
            cond=self.data["DayOfYear"]) # type: pd.DataFrame

        # The result is a 3x3 data frame
        self.assertEqual(uncond.shape, (3,3))
        self.assertEqual(cond_doy.shape, (3,3))
        self.assertIsInstance(uncond, pd.DataFrame)
        self.assertIsInstance(cond_doy, pd.DataFrame)

        # The matrix is symmetric
        self.assertEqual(uncond.loc["Temperature", "DayOfYear"],
            uncond.loc["DayOfYear", "Temperature"])

        # Temperature is highly dependent on day of year
        self.assertAlmostEqual(uncond.loc["Temperature", "DayOfYear"], 0.9, delta=0.03)

        # There is no correlation with the conditioning variable
        self.assertLess(cond_doy.loc["Temperature", "DayOfYear"], 0.02)

        # The correlation between temperature and wind direction is
        # increased by conditioning on DOY
        self.assertGreater(cond_doy.loc["Temperature", "WindDir"],
            uncond.loc["Temperature", "WindDir"] + 0.1)
Exemple #3
0
    def test_preprocess(self) -> None:
        # The low variance of cond breaks the algorithm
        data = self.generate_normal(2020_07_16)
        cond = data[:, 0] * 1e-5

        mi_unscaled = pairwise_mi(data, cond=cond, preprocess=False)
        mi_scaled = pairwise_mi(data, cond=cond, preprocess=True)

        self.assertNotAlmostEqual(mi_unscaled[0, 1], 0.0, delta=0.2)
        self.assertLess(mi_scaled[0, 1], 0.03)
Exemple #4
0
    def test_k_larger_than_observations(self) -> None:
        data = np.reshape(np.arange(20), (10, 2))

        # Without mask
        with self.assertRaises(ValueError) as cm:
            pairwise_mi(data, k=10)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)

        # With mask
        mask = np.full(10, True)
        mask[:5] = False
        with self.assertRaises(ValueError) as cm:
            pairwise_mi(data, k=5, mask=mask)
        self.assertEqual(str(cm.exception), K_TOO_LARGE_MSG)
Exemple #5
0
    def test_normalization(self) -> None:
        data = self.generate_normal(104)

        result = pairwise_mi(data, normalize=True)

        self.assertEqual(result.shape, (2, 2))
        self.assertTrue(np.isnan(result[0, 0]))
        self.assertTrue(np.isnan(result[1, 1]))
        self.assertAlmostEqual(result[0, 1], 0.6, delta=0.05)
        self.assertAlmostEqual(result[0, 1], 0.6, delta=0.05)
Exemple #6
0
    def test_preprocess_two_cond_vars(self) -> None:
        # As above, but detect if the normalization is done over the whole
        # cond array, not per-column.
        data = self.generate_normal(2020_07_27)
        unif = np.random.default_rng(2020_07_27).uniform(size=len(data)) * 1e6
        cond = np.column_stack((data[:, 0] * 1e-5, unif))

        mi_scaled = pairwise_mi(data, cond=cond, preprocess=True)

        self.assertLess(mi_scaled[0, 1], 0.03)
Exemple #7
0
    def test_conditioning(self) -> None:
        data = self.generate_normal(102)

        result = pairwise_mi(data, cond=data[:, 1])

        self.assertEqual(result.shape, (2, 2))
        self.assertTrue(np.isnan(result[0, 0]))
        self.assertTrue(np.isnan(result[1, 1]))
        self.assertLess(result[0, 1], 0.03)
        self.assertLess(result[1, 0], 0.03)
Exemple #8
0
    def test_ndarray(self) -> None:
        data = self.generate_normal(100)
        expected = -0.5 * math.log(1 - 0.6**2)

        result = pairwise_mi(data)

        self.assertEqual(result.shape, (2, 2))
        self.assertTrue(np.isnan(result[0, 0]))
        self.assertTrue(np.isnan(result[1, 1]))
        self.assertAlmostEqual(result[0, 1], expected, delta=0.03)
        self.assertAlmostEqual(result[1, 0], expected, delta=0.03)
Exemple #9
0
    def test_mask_removes_nans(self) -> None:
        data = self.generate_normal(102)
        expected = -0.5 * math.log(1 - 0.6**2)
        data[0:10, 0] = np.nan
        data[5:15, 1] = np.nan

        # Without mask, the estimation should fail
        with self.assertRaises(ValueError) as cm:
            pairwise_mi(data)
        self.assertEqual(str(cm.exception), NANS_LEFT_MSG)

        # With mask, the estimation succeeds
        mask = np.full(1000, True)
        mask[0:15] = False
        result = pairwise_mi(data, mask=mask)

        self.assertEqual(result.shape, (2, 2))
        self.assertTrue(np.isnan(result[0, 0]))
        self.assertTrue(np.isnan(result[1, 1]))
        self.assertAlmostEqual(result[0, 1], expected, delta=0.02)
        self.assertAlmostEqual(result[1, 0], expected, delta=0.02)
Exemple #10
0
    def test_drop_nan(self) -> None:
        rng = np.random.default_rng(2020_07_28)
        cov = np.array([[1, 0.8], [0.8, 1]])
        data = rng.multivariate_normal([0, 0], cov, size=1000)
        data[:50, 0] = np.nan
        data[950:, 1] = np.nan

        cond = rng.uniform(size=data.shape)
        cond[100:120, 0] = np.nan
        cond[900:960, 0] = np.nan

        mi = pairwise_mi(data, cond=cond, normalize=True, drop_nan=True)
        self.assertAlmostEqual(mi[0, 1], 0.8, delta=0.02)
Exemple #11
0
    def test_callback(self) -> None:
        # Use larger N to force multithreading
        for N in [100, 3000]:
            with self.subTest(N=N):
                callback_results = []

                def callback(i: int, j: int) -> None:
                    callback_results.append((i, j))

                rng = np.random.default_rng(105)
                data = rng.multivariate_normal([0] * 10, np.eye(10), N)

                _ = pairwise_mi(data, callback=callback)

                self.assertEqual(len(callback_results), 10 * 9 / 2)
                for (i, j) in product(range(10), range(10)):
                    if i < j:
                        self.assertIn((i, j), callback_results)
                    else:
                        self.assertNotIn((i, j), callback_results)
Exemple #12
0
    def test_pandas(self) -> None:
        rng = np.random.default_rng(101)
        cov = np.asarray([[1, 0.6], [0.6, 1]])
        normal_data = rng.multivariate_normal([0, 0], cov, 1000)
        unif_data = rng.uniform(size=1000)
        expected = -0.5 * math.log(1 - 0.6**2)

        data = pd.DataFrame({
            "X": normal_data[:, 0],
            "Y": normal_data[:, 1],
            "Z": unif_data
        })
        result = pairwise_mi(data)  # type: pd.DataFrame

        self.assertEqual(result.shape, (3, 3))
        self.assertIsInstance(result, pd.DataFrame)

        for i in "XYZ":
            self.assertTrue(np.isnan(result.loc[i, i]))
        self.assertAlmostEqual(result.loc["X", "Y"], expected, delta=0.04)
        self.assertAlmostEqual(result.loc["Y", "X"], expected, delta=0.04)
        for i in "XY":
            self.assertAlmostEqual(result.loc[i, "Z"], 0.0, delta=0.03)
            self.assertAlmostEqual(result.loc["Z", i], 0.0, delta=0.03)
Exemple #13
0
 def test_data_has_three_dimensions(self) -> None:
     data = np.full((10, 3, 2), 0.0)
     with self.assertRaises(ValueError) as cm:
         pairwise_mi(data)
     self.assertEqual(str(cm.exception), X_WRONG_DIMENSION_MSG)
Exemple #14
0
    def test_only_one_variable_returns_nan(self) -> None:
        result = pairwise_mi([1, 2, 3, 4])

        self.assertEqual(result.shape, (1, 1))
        self.assertTrue(np.isnan(result[0, 0]))
Exemple #15
0
 def test_invalid_k(self) -> None:
     data = np.full((10, 2), 0.0)
     with self.assertRaises(ValueError) as cm:
         pairwise_mi(data, k=0)
     self.assertEqual(str(cm.exception), K_NEGATIVE_MSG)
Exemple #16
0
    def test_only_one_variable_returns_nan_2d_array(self) -> None:
        result = pairwise_mi([[1], [2], [3], [4]])

        self.assertEqual(result.shape, (1, 1))
        self.assertTrue(np.isnan(result[0, 0]))
Exemple #17
0
#
# STEP 2: Preprocess
#
# Nothing to be done, because the distributions are roughly symmetric


#
# STEP 3: Create a mask
#
afternoon_mask = (data.index.hour == 13)


#
# STEP 4: Plot pairwise MI
#
pairwise = pairwise_mi(data, mask=afternoon_mask, normalize=True)

# Plot a matrix where the color represents the correlation coefficient.
# We clip the color values at 0.2 because of significant random noise,
# and at 0.8 to make the color constrast larger.
fig, ax = plt.subplots(figsize=(8,6))
mesh = ax.pcolormesh(pairwise, vmin=0.2, vmax=0.8)
fig.colorbar(mesh, label="MI correlation coefficient", extend="both")

# Show the variable names on the axes
ax.set_xticks(np.arange(len(data.columns)) + 0.5)
ax.set_yticks(np.arange(len(data.columns)) + 0.5)
ax.set_xticklabels(data.columns)
ax.set_yticklabels(data.columns)
ax.set_title("Unconditional MI at 15:00 local time")