예제 #1
0
    def test_allequal(self):
        df = pd.DataFrame()
        df["c1"] = [chr(0) for _ in range(100)]
        df["c2"] = [1 for _ in range(100)]
        df["c3"] = [0.7 for _ in range(100)]

        r = entropy(0, df)[0]
        self.assertEqual(r, 0.)
        r = entropy(1, df)[0]
        self.assertEqual(r, 0.)
        r = entropy(2, df)[0]
        self.assertEqual(r, 0.)
예제 #2
0
    def test_allnull(self):
        df = pd.DataFrame()
        df["c1"] = [None for i in range(100)]
        df["c2"] = [np.NaN for i in range(100)]
        df["c3"] = [np.NaN / 0.7 for i in range(100)]

        r = entropy(0, df)[0]
        self.assertEqual(r, 0.)
        r = entropy(1, df)[0]
        self.assertEqual(r, 0.)
        r = entropy(2, df)[0]
        self.assertEqual(r, 0.)
예제 #3
0
    def test_empty(self):
        df = pd.DataFrame()
        df["c1"] = []
        df["c2"] = []

        r1 = entropy(0, df)[0]
        self.assertEqual(r1, 0.)
예제 #4
0
    def test_halfhalf(self):
        df = pd.DataFrame()
        c1 = [chr(1) for _ in range(50)]
        c2 = [2 for _ in range(50)]
        c3 = [0.7 for _ in range(50)]
        c1.extend(["zz" for _ in range(50)])
        c2.extend([100 for _ in range(50)])
        c3.extend([32. for _ in range(50)])
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, 1., delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, 1., delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, 1., delta=0.000001)
예제 #5
0
    def test_halfnull_halfequal(self):
        df = pd.DataFrame()
        c1 = [chr(1) for _ in range(50)]
        c2 = [2 for _ in range(50)]
        c3 = [0.7 for _ in range(50)]
        c1.extend([None for _ in range(50)])
        c2.extend([np.NaN for _ in range(50)])
        c3.extend([None for _ in range(50)])
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, 0., delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, 0., delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, 0., delta=0.000001)
예제 #6
0
    def test_alldifferent(self):
        df = pd.DataFrame()
        c1 = [chr(i) for i in range(100)]
        c2 = [i for i in range(100)]
        c3 = [i / 0.7 for i in range(100)]
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        res = 6.6438561897747395
        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)

        for i in range(10):
            c1[i] = None
            c2[i] = None
            c3[i] = np.NaN
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        res = 6.491853096329675
        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
예제 #7
0
    def test_mixed(self):
        df = pd.DataFrame()
        c1 = [chr(i) for i in range(10)]
        c2 = [i for i in range(1, 11)]
        c3 = [i / 0.7 for i in range(1, 11)]
        c1.extend(["swwewww" for _ in range(20)])
        c2.extend([5000 for _ in range(20)])
        c3.extend([231321.23131 for _ in range(20)])
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        res = 2.025605199016944
        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)

        c1.extend([None for _ in range(5)])
        c2.extend([np.NaN for _ in range(5)])
        c3.extend([np.NaN for _ in range(5)])
        df = pd.DataFrame()
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        res = 2.025605199016944
        r = entropy(0, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(1, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)
        r = entropy(2, df)[0]
        self.assertAlmostEqual(r, res, delta=0.000001)