def test_sparse(self): d = self._construct_sparse() cont = contingency.Discrete(d, 5) assert_dist_equal(cont[0], [2, 0, 0]) assert_dist_equal(cont["b"], [0, 1, 1]) assert_dist_equal(cont[2], [1, 0, 0]) cont = contingency.Continuous(d, 14) assert_dist_equal(cont[0], [[], []]) assert_dist_equal(cont["b"], [[1], [1]]) assert_dist_equal(cont[2], [[2], [1]]) cont = contingency.Continuous(d, "c3") assert_dist_equal(cont[0], [[1.1], [1]]) assert_dist_equal(cont["b"], [[1], [1]]) assert_dist_equal(cont[2], [[], []]) d[4].set_class(1) cont = contingency.Continuous(d, 13) assert_dist_equal(cont[0], [[], []]) assert_dist_equal(cont["b"], [[1, 1.1], [1, 1]]) assert_dist_equal(cont[2], [[], []]) cont = contingency.Continuous(d, 12) assert_dist_equal(cont[0], [[], []]) assert_dist_equal(cont["b"], [[], []]) assert_dist_equal(cont[2], [[], []])
def test_continuous_missing(self): d = data.Table("iris") d[1][1] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[ 2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.4 ], [1, 1, 5, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]] np.testing.assert_almost_equal(cont.unknowns, [1, 0, 0]) np.testing.assert_almost_equal(cont["Iris-setosa"], correct) self.assertEqual(cont.unknown_rows, 0) d.Y[0] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[ 2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8 ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]] np.testing.assert_almost_equal( cont[d.domain.class_var.values.index("Iris-virginica")], correct) np.testing.assert_almost_equal(cont.unknowns, [1, 0, 0]) self.assertEqual(cont.unknown_rows, 1) d.Y[1] = float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.unknowns, [0, 0, 0]) self.assertEqual(cont.unknown_rows, 2)
def test_continuous_missing(self): d = data.Table("iris") with d.unlocked(): d[1][1] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[ 2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.4 ], [1, 1, 5, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]] np.testing.assert_almost_equal(cont.col_unknowns, [1, 0, 0]) np.testing.assert_almost_equal(cont.row_unknowns, np.zeros(23)) np.testing.assert_almost_equal(cont["Iris-setosa"], correct) self.assertEqual(cont.unknowns, 0) with d.unlocked(): d.Y[0] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[ 2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8 ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]] np.testing.assert_almost_equal( cont[d.domain.class_var.values.index("Iris-virginica")], correct) np.testing.assert_almost_equal(cont.col_unknowns, [1, 0, 0]) np.testing.assert_almost_equal(cont.row_unknowns, [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. ]) self.assertEqual(cont.unknowns, 0) with d.unlocked(): d.Y[1] = float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal(cont.row_unknowns, [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. ]) self.assertEqual(cont.unknowns, 1) # this one was failing before since the issue in _contingecy.pyx with d.unlocked(): d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal(cont.row_unknowns, [ 0., 0., 1., 0., 0., 0., 0., 0., 1., 5., 5., 5., 2., 9., 6., 2., 3., 4., 2., 1., 1., 1., 1. ]) self.assertEqual(cont.unknowns, 1)
def test_continuous_array_with_unknowns(): """ Test array_with_unknowns function """ d = data.Table("iris") d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") correct_row_unknowns = [ 0., 0., 1., 0., 0., 0., 0., 0., 1., 6., 5., 5., 2., 9., 6., 2., 3., 4., 2., 1., 1., 1., 1. ] correct_row_unknowns_no_zero = [ c for c in correct_row_unknowns if c > 0 ] correct_values_no_zero = [ v for v, c in zip(cont.values, correct_row_unknowns) if c > 0 ] np.testing.assert_almost_equal(cont.row_unknowns, correct_row_unknowns) arr_unknowns = cont.array_with_unknowns np.testing.assert_almost_equal(arr_unknowns[-1][1], correct_row_unknowns_no_zero) np.testing.assert_almost_equal(arr_unknowns[-1][0], correct_values_no_zero) # check if other match to what we get with __getitem__ for v1, v2 in zip(arr_unknowns[:-1], cont): np.testing.assert_almost_equal(v1, v2)
def test_continuous(self): d = data.Table("iris") cont = contingency.Continuous(d, "sepal width") correct = [[ 2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.4 ], [1, 1, 6, 5, 5, 2, 9, 6, 2, 3, 4, 2, 1, 1, 1, 1]] np.testing.assert_almost_equal(cont["Iris-setosa"], correct) correct = [[ 2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8 ], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]] np.testing.assert_almost_equal( cont[d.domain.class_var.values.index("Iris-virginica")], correct)