class TestNaiveBayesFit(unittest.TestCase): def setUp(self): self.model1 = NaiveBayes() self.model2 = NaiveBayes([("A", "B")]) def test_fit_model_creation(self): values = pd.DataFrame( np.random.randint(low=0, high=2, size=(1000, 5)), columns=["A", "B", "C", "D", "E"], ) self.model1.fit(values, "A") six.assertCountEqual(self, self.model1.nodes(), ["A", "B", "C", "D", "E"]) six.assertCountEqual( self, self.model1.edges(), [("A", "B"), ("A", "C"), ("A", "D"), ("A", "E")] ) self.assertEqual(self.model1.parent_node, "A") self.assertSetEqual(self.model1.children_nodes, {"B", "C", "D", "E"}) self.model2.fit(values) six.assertCountEqual(self, self.model1.nodes(), ["A", "B", "C", "D", "E"]) six.assertCountEqual( self, self.model1.edges(), [("A", "B"), ("A", "C"), ("A", "D"), ("A", "E")] ) self.assertEqual(self.model2.parent_node, "A") self.assertSetEqual(self.model2.children_nodes, {"B", "C", "D", "E"}) def test_fit_model_creation_exception(self): values = pd.DataFrame( np.random.randint(low=0, high=2, size=(1000, 5)), columns=["A", "B", "C", "D", "E"], ) values2 = pd.DataFrame( np.random.randint(low=0, high=2, size=(1000, 3)), columns=["C", "D", "E"] ) self.assertRaises(ValueError, self.model1.fit, values) self.assertRaises(ValueError, self.model1.fit, values2) self.assertRaises(ValueError, self.model2.fit, values2, "A") def tearDown(self): del self.model1 del self.model2
class TestNaiveBayesFit(unittest.TestCase): def setUp(self): self.model1 = NaiveBayes() self.model2 = NaiveBayes([('A', 'B')]) def test_fit_model_creation(self): values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E']) self.model1.fit(values, 'A') six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E']) six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'E')]) self.assertEqual(self.model1.parent_node, 'A') self.assertSetEqual(self.model1.children_nodes, {'B', 'C', 'D', 'E'}) self.model2.fit(values) six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E']) six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'E')]) self.assertEqual(self.model2.parent_node, 'A') self.assertSetEqual(self.model2.children_nodes, {'B', 'C', 'D', 'E'}) def test_fit_model_creation_exception(self): values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E']) values2 = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 3)), columns=['C', 'D', 'E']) self.assertRaises(ValueError, self.model1.fit, values) self.assertRaises(ValueError, self.model1.fit, values2) self.assertRaises(ValueError, self.model2.fit, values2, 'A') def tearDown(self): del self.model1 del self.model2
class TestNaiveBayesFit(unittest.TestCase): def setUp(self): self.model1 = NaiveBayes() self.model2 = NaiveBayes([('A','B')]) def test_fit_model_creation(self): values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E']) self.model1.fit(values, 'A') six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E']) six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'E')]) self.assertEqual(self.model1.parent_node, 'A') self.assertSetEqual(self.model1.children_nodes, {'B','C','D','E'}) self.model2.fit(values) six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E']) six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'E')]) self.assertEqual(self.model2.parent_node, 'A') self.assertSetEqual(self.model2.children_nodes, {'B','C','D','E'}) def test_fit_model_creation_exception(self): values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E']) values2 = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 3)), columns=['C', 'D', 'E']) self.assertRaises(ValueError, self.model1.fit, values) self.assertRaises(ValueError, self.model1.fit, values2) self.assertRaises(ValueError, self.model2.fit, values2, 'A') def tearDown(self): del self.model1 del self.model2
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = NaiveBayes() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = NaiveBayes([('a', 'b'), ('a', 'c')]) six.assertCountEqual(self, self.g.nodes(), ['a', 'b', 'c']) six.assertCountEqual(self, self.g.edges(), [('a', 'b'), ('a', 'c')]) self.assertEqual(self.g.parent_node, 'a') self.assertSetEqual(self.g.children_nodes, {'b', 'c'}) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('b', 'c')]) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('c', 'b')]) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('d', 'e')]) def test_class_init_with_data_nonstring(self): self.g = NaiveBayes([(1, 2), (1, 3)]) six.assertCountEqual(self, self.g.nodes(), [1, 2, 3]) six.assertCountEqual(self, self.g.edges(), [(1, 2), (1, 3)]) self.assertEqual(self.g.parent_node, 1) self.assertSetEqual(self.g.children_nodes, {2, 3}) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (2, 3)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 2)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 4)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) self.assertListEqual(self.G.nodes(), [1]) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) six.assertCountEqual(self, self.G.nodes(), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) six.assertCountEqual(self, self.G.nodes(), [1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('a', 'b') six.assertCountEqual(self, self.G.nodes(), ['a', 'b']) self.assertListEqual(self.G.edges(), [('a', 'b')]) self.assertEqual(self.G.parent_node, 'a') self.assertSetEqual(self.G.children_nodes, {'b'}) self.G.add_nodes_from(['c', 'd']) self.G.add_edge('a', 'c') self.G.add_edge('a', 'd') six.assertCountEqual(self, self.G.nodes(), ['a', 'b', 'c', 'd']) six.assertCountEqual(self, self.G.edges(), [('a', 'b'), ('a', 'c'), ('a', 'd')]) self.assertEqual(self.G.parent_node, 'a') self.assertSetEqual(self.G.children_nodes, {'b', 'c', 'd'}) self.assertRaises(ValueError, self.G.add_edge, 'b', 'c') self.assertRaises(ValueError, self.G.add_edge, 'd', 'f') self.assertRaises(ValueError, self.G.add_edge, 'e', 'f') self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'e'), ('b', 'f')]) self.assertRaises(ValueError, self.G.add_edges_from, [('b', 'f')]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) six.assertCountEqual(self, self.G.nodes(), [1, 2]) self.assertListEqual(self.G.edges(), [(1, 2)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2}) self.G.add_nodes_from([3, 4]) self.G.add_edge(1, 3) self.G.add_edge(1, 4) six.assertCountEqual(self, self.G.nodes(), [1, 2, 3, 4]) six.assertCountEqual(self, self.G.edges(), [(1, 2), (1, 3), (1, 4)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2, 3, 4}) self.assertRaises(ValueError, self.G.add_edge, 2, 3) self.assertRaises(ValueError, self.G.add_edge, 3, 6) self.assertRaises(ValueError, self.G.add_edge, 5, 6) self.assertRaises(ValueError, self.G.add_edges_from, [(1, 5), (2, 6)]) self.assertRaises(ValueError, self.G.add_edges_from, [(2, 6)]) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') self.assertRaises(ValueError, self.G.add_edge, 1, 1) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = NaiveBayes([('a', 'b'), ('a', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['a']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['a']) def tearDown(self): del self.G
PseudoCounts = {} #Pseudocounts are given (1,1) for uniform for productName in smallDF.columns: PseudoCounts[productName] = [1, 1] DictOfModels = {} Edges = {} Nodes = {} CPD = {} for productName in smallDF.columns: print('Building model for {0}'.format(productName)) model = NaiveBayes() model.fit(smallDF, productName) DictOfModels[productName] = model #Save edge ,node, CPD information Edges[productName] = model.edges() Nodes[productName] = model.nodes() CPD[productName] = model.get_cpds() with open("Edges.txt", "wb") as fp: pickle.dump(Edges, fp) with open("Nodes.txt", "wb") as fp: pickle.dump(Nodes, fp) with open("CPD.txt", "wb") as fp: pickle.dump(CPD, fp) with open("RandomColumns.txt", "wb") as fp: pickle.dump(random_columns, fp) with open("RandomIndices.txt", "wb") as fp: pickle.dump(random_indices, fp)
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = NaiveBayes() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = NaiveBayes([("a", "b"), ("a", "c")]) six.assertCountEqual(self, list(self.g.nodes()), ["a", "b", "c"]) six.assertCountEqual(self, list(self.g.edges()), [("a", "b"), ("a", "c")]) self.assertEqual(self.g.parent_node, "a") self.assertSetEqual(self.g.children_nodes, {"b", "c"}) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("b", "c")]) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("c", "b")]) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("d", "e")]) def test_class_init_with_data_nonstring(self): self.g = NaiveBayes([(1, 2), (1, 3)]) six.assertCountEqual(self, list(self.g.nodes()), [1, 2, 3]) six.assertCountEqual(self, list(self.g.edges()), [(1, 2), (1, 3)]) self.assertEqual(self.g.parent_node, 1) self.assertSetEqual(self.g.children_nodes, {2, 3}) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (2, 3)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 2)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 4)]) def test_add_node_string(self): self.G.add_node("a") self.assertListEqual(list(self.G.nodes()), ["a"]) def test_add_node_nonstring(self): self.G.add_node(1) self.assertListEqual(list(self.G.nodes()), [1]) def test_add_nodes_from_string(self): self.G.add_nodes_from(["a", "b", "c", "d"]) six.assertCountEqual(self, list(self.G.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) six.assertCountEqual(self, list(self.G.nodes()), [1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge("a", "b") six.assertCountEqual(self, list(self.G.nodes()), ["a", "b"]) self.assertListEqual(list(self.G.edges()), [("a", "b")]) self.assertEqual(self.G.parent_node, "a") self.assertSetEqual(self.G.children_nodes, {"b"}) self.G.add_nodes_from(["c", "d"]) self.G.add_edge("a", "c") self.G.add_edge("a", "d") six.assertCountEqual(self, list(self.G.nodes()), ["a", "b", "c", "d"]) six.assertCountEqual( self, list(self.G.edges()), [("a", "b"), ("a", "c"), ("a", "d")] ) self.assertEqual(self.G.parent_node, "a") self.assertSetEqual(self.G.children_nodes, {"b", "c", "d"}) self.assertRaises(ValueError, self.G.add_edge, "b", "c") self.assertRaises(ValueError, self.G.add_edge, "d", "f") self.assertRaises(ValueError, self.G.add_edge, "e", "f") self.assertRaises(ValueError, self.G.add_edges_from, [("a", "e"), ("b", "f")]) self.assertRaises(ValueError, self.G.add_edges_from, [("b", "f")]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) six.assertCountEqual(self, list(self.G.nodes()), [1, 2]) self.assertListEqual(list(self.G.edges()), [(1, 2)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2}) self.G.add_nodes_from([3, 4]) self.G.add_edge(1, 3) self.G.add_edge(1, 4) six.assertCountEqual(self, list(self.G.nodes()), [1, 2, 3, 4]) six.assertCountEqual(self, list(self.G.edges()), [(1, 2), (1, 3), (1, 4)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2, 3, 4}) self.assertRaises(ValueError, self.G.add_edge, 2, 3) self.assertRaises(ValueError, self.G.add_edge, 3, 6) self.assertRaises(ValueError, self.G.add_edge, 5, 6) self.assertRaises(ValueError, self.G.add_edges_from, [(1, 5), (2, 6)]) self.assertRaises(ValueError, self.G.add_edges_from, [(2, 6)]) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, "a", "a") self.assertRaises(ValueError, self.G.add_edge, 1, 1) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")]) def test_update_node_parents_bm_constructor(self): self.g = NaiveBayes([("a", "b"), ("a", "c")]) self.assertListEqual(list(self.g.predecessors("a")), []) self.assertListEqual(list(self.g.predecessors("b")), ["a"]) self.assertListEqual(list(self.g.predecessors("c")), ["a"]) def test_update_node_parents(self): self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edges_from([("a", "b"), ("a", "c")]) self.assertListEqual(list(self.G.predecessors("a")), []) self.assertListEqual(list(self.G.predecessors("b")), ["a"]) self.assertListEqual(list(self.G.predecessors("c")), ["a"]) def tearDown(self): del self.G