예제 #1
0
    def test_sparse(self):
        d = self._construct_sparse()
        cont = contingency.Discrete(d, 5)
        assert_dist_equal(cont[0], [2, 0, 0])
        assert_dist_equal(cont["b"], [0, 1, 1])
        assert_dist_equal(cont[2], [1, 0, 0])

        cont = contingency.Continuous(d, 14)
        assert_dist_equal(cont[0], [[], []])
        assert_dist_equal(cont["b"], [[1], [1]])
        assert_dist_equal(cont[2], [[2], [1]])

        cont = contingency.Continuous(d, "c3")
        assert_dist_equal(cont[0], [[1.1], [1]])
        assert_dist_equal(cont["b"], [[1], [1]])
        assert_dist_equal(cont[2], [[], []])

        d[4].set_class(1)
        cont = contingency.Continuous(d, 13)
        assert_dist_equal(cont[0], [[], []])
        assert_dist_equal(cont["b"], [[1, 1.1], [1, 1]])
        assert_dist_equal(cont[2], [[], []])

        cont = contingency.Continuous(d, 12)
        assert_dist_equal(cont[0], [[], []])
        assert_dist_equal(cont["b"], [[], []])
        assert_dist_equal(cont[2], [[], []])
예제 #2
0
    def test_continuous_missing(self):
        d = data.Table("iris")
        d[1][1] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0,
            4.1, 4.2, 4.4
        ], [1, 1, 5, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]]
        np.testing.assert_almost_equal(cont.unknowns, [1, 0, 0])
        np.testing.assert_almost_equal(cont["Iris-setosa"], correct)
        self.assertEqual(cont.unknown_rows, 0)

        d.Y[0] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8
        ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]]
        np.testing.assert_almost_equal(
            cont[d.domain.class_var.values.index("Iris-virginica")], correct)
        np.testing.assert_almost_equal(cont.unknowns, [1, 0, 0])
        self.assertEqual(cont.unknown_rows, 1)

        d.Y[1] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        np.testing.assert_almost_equal(cont.unknowns, [0, 0, 0])
        self.assertEqual(cont.unknown_rows, 2)
예제 #3
0
    def test_continuous_missing(self):
        d = data.Table("iris")
        with d.unlocked():
            d[1][1] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0,
            4.1, 4.2, 4.4
        ], [1, 1, 5, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]]
        np.testing.assert_almost_equal(cont.col_unknowns, [1, 0, 0])
        np.testing.assert_almost_equal(cont.row_unknowns, np.zeros(23))
        np.testing.assert_almost_equal(cont["Iris-setosa"], correct)
        self.assertEqual(cont.unknowns, 0)

        with d.unlocked():
            d.Y[0] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8
        ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]]
        np.testing.assert_almost_equal(
            cont[d.domain.class_var.values.index("Iris-virginica")], correct)
        np.testing.assert_almost_equal(cont.col_unknowns, [1, 0, 0])
        np.testing.assert_almost_equal(cont.row_unknowns, [
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 0., 0., 0., 0.
        ])
        self.assertEqual(cont.unknowns, 0)

        with d.unlocked():
            d.Y[1] = float("nan")
        cont = contingency.Continuous(d, "sepal width")
        np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0])
        np.testing.assert_almost_equal(cont.row_unknowns, [
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 0., 0., 0., 0.
        ])
        self.assertEqual(cont.unknowns, 1)

        # this one was failing before since the issue in _contingecy.pyx
        with d.unlocked():
            d.Y[:50] = np.zeros(50) * float("nan")
        cont = contingency.Continuous(d, "sepal width")
        np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0])
        np.testing.assert_almost_equal(cont.row_unknowns, [
            0., 0., 1., 0., 0., 0., 0., 0., 1., 5., 5., 5., 2., 9., 6., 2., 3.,
            4., 2., 1., 1., 1., 1.
        ])
        self.assertEqual(cont.unknowns, 1)
예제 #4
0
    def test_continuous_array_with_unknowns():
        """
        Test array_with_unknowns function
        """
        d = data.Table("iris")
        d.Y[:50] = np.zeros(50) * float("nan")
        cont = contingency.Continuous(d, "sepal width")
        correct_row_unknowns = [
            0., 0., 1., 0., 0., 0., 0., 0., 1., 6., 5., 5., 2., 9., 6., 2., 3.,
            4., 2., 1., 1., 1., 1.
        ]
        correct_row_unknowns_no_zero = [
            c for c in correct_row_unknowns if c > 0
        ]
        correct_values_no_zero = [
            v for v, c in zip(cont.values, correct_row_unknowns) if c > 0
        ]

        np.testing.assert_almost_equal(cont.row_unknowns, correct_row_unknowns)
        arr_unknowns = cont.array_with_unknowns
        np.testing.assert_almost_equal(arr_unknowns[-1][1],
                                       correct_row_unknowns_no_zero)
        np.testing.assert_almost_equal(arr_unknowns[-1][0],
                                       correct_values_no_zero)

        # check if other match to what we get with __getitem__
        for v1, v2 in zip(arr_unknowns[:-1], cont):
            np.testing.assert_almost_equal(v1, v2)
예제 #5
0
    def test_continuous(self):
        d = data.Table("iris")
        cont = contingency.Continuous(d, "sepal width")
        correct = [[
            2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0,
            4.1, 4.2, 4.4
        ], [1, 1, 6, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]]
        np.testing.assert_almost_equal(cont["Iris-setosa"], correct)

        correct = [[
            2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8
        ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]]
        np.testing.assert_almost_equal(
            cont[d.domain.class_var.values.index("Iris-virginica")], correct)