Example #1
0
    def test_allequal(self):
        df = pd.DataFrame()
        df["c1"] = [chr(0) for _ in range(100)]
        df["c2"] = [1 for _ in range(100)]

        pmi = self.mi(df, "c1", "c2")
        r = mutual_info(0, 1, df)[0]
        self.assertEqual(r, pmi)
        r = mutual_info(1, 0, df)[0]
        self.assertEqual(r, pmi)
Example #2
0
    def test_allnull(self):
        df = pd.DataFrame()
        df["c1"] = [None for i in range(100)]
        df["c2"] = [np.NaN for i in range(100)]

        pmi = self.mi(df, "c1", "c2")
        r = mutual_info(0, 1, df)[0]
        self.assertEqual(r, pmi)
        r = mutual_info(1, 0, df)[0]
        self.assertEqual(r, pmi)
Example #3
0
    def test_halfnull_halfequal(self):
        df = pd.DataFrame()
        c1 = [chr(1) for _ in range(50)]
        c2 = [2 for _ in range(50)]
        c1.extend([None for _ in range(50)])
        c2.extend([np.NaN for _ in range(50)])
        df["c1"] = c1
        df["c2"] = c2

        pmi = self.mi(df, "c1", "c2")
        r = mutual_info(0, 1, df)[0]
        self.assertAlmostEqual(r, pmi, delta=0.000001)
        r = mutual_info(1, 0, df)[0]
        self.assertAlmostEqual(r, pmi, delta=0.000001)
Example #4
0
    def test_empty(self):
        df = pd.DataFrame()
        df["c1"] = []
        df["c2"] = []

        r1 = mutual_info(0, 1, df)[0]
        self.assertEqual(r1, 0.)
Example #5
0
    def test_halfhalf_shuffled(self):
        for _ in range(2):
            df = pd.DataFrame()
            c1 = [chr(1) for _ in range(50)]
            c2 = [2 for _ in range(50)]
            c3 = [0.7 for _ in range(50)]
            c1.extend(["zz" for _ in range(50)])
            c2.extend([100 for _ in range(50)])
            c3.extend([32. for _ in range(50)])
            random.shuffle(c1)
            random.shuffle(c2)
            random.shuffle(c3)
            df["c1"] = c1
            df["c2"] = c2
            df["c3"] = c3

            pmi = self.mi(df, "c1", "c2")
            r = mutual_info(0, 1, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(1, 0, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)

            pmi = self.mi(df, "c1", "c3")
            r = mutual_info(0, 2, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(2, 0, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)

            pmi = self.mi(df, "c2", "c3")
            r = mutual_info(1, 2, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(2, 1, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
Example #6
0
    def test_mixed_shuffled_with_null(self):
        for _ in range(2):
            df = pd.DataFrame()
            c1 = [chr(i) for i in range(50)]
            c2 = [i for i in range(1, 51)]
            c3 = [i / 0.7 for i in range(1, 51)]
            c1.extend([None for _ in range(50)])
            c2.extend([np.NaN for _ in range(50)])
            c3.extend([None for _ in range(50)])
            random.shuffle(c1)
            random.shuffle(c2)
            random.shuffle(c3)
            df["c1"] = c1
            df["c2"] = c2
            df["c3"] = c3

            pmi = self.mi(df, "c1", "c2")
            r = mutual_info(0, 1, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(1, 0, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)

            pmi = self.mi(df, "c1", "c3")
            r = mutual_info(0, 2, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(2, 0, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)

            pmi = self.mi(df, "c2", "c3")
            r = mutual_info(1, 2, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)
            r = mutual_info(2, 1, df)[0]
            self.assertAlmostEqual(r, pmi, delta=0.000001)