def Bayesian_Net_Model(data): cols = data.columns.values n_cols = len(data.columns.values) BN_Model = NaiveBayes() BN_Model.add_nodes_from(cols) edges = [] for i in cols: if (i != "Overall"): edge = ["Overall", i] edges.append(edge) BN_Model.add_edges_from(edges) print("Aggiunti Archi e Nodi \n\n") data_cpts = Compute_CPT(data, "Overall") CPTS_list = Generate_CPTs(data_cpts, data, cols, 'Overall') test_list = [None] * len(CPTS_list) for i in CPTS_list: BN_Model.add_cpds(i) print("Aggiunte CPD \n\n") return BN_Model
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = NaiveBayes() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = NaiveBayes([('a', 'b'), ('a', 'c')]) six.assertCountEqual(self, self.g.nodes(), ['a', 'b', 'c']) six.assertCountEqual(self, self.g.edges(), [('a', 'b'), ('a', 'c')]) self.assertEqual(self.g.parent_node, 'a') self.assertSetEqual(self.g.children_nodes, {'b', 'c'}) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('b', 'c')]) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('c', 'b')]) self.assertRaises(ValueError, NaiveBayes, [('a', 'b'), ('d', 'e')]) def test_class_init_with_data_nonstring(self): self.g = NaiveBayes([(1, 2), (1, 3)]) six.assertCountEqual(self, self.g.nodes(), [1, 2, 3]) six.assertCountEqual(self, self.g.edges(), [(1, 2), (1, 3)]) self.assertEqual(self.g.parent_node, 1) self.assertSetEqual(self.g.children_nodes, {2, 3}) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (2, 3)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 2)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 4)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) self.assertListEqual(self.G.nodes(), [1]) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) six.assertCountEqual(self, self.G.nodes(), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) six.assertCountEqual(self, self.G.nodes(), [1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('a', 'b') six.assertCountEqual(self, self.G.nodes(), ['a', 'b']) self.assertListEqual(self.G.edges(), [('a', 'b')]) self.assertEqual(self.G.parent_node, 'a') self.assertSetEqual(self.G.children_nodes, {'b'}) self.G.add_nodes_from(['c', 'd']) self.G.add_edge('a', 'c') self.G.add_edge('a', 'd') six.assertCountEqual(self, self.G.nodes(), ['a', 'b', 'c', 'd']) six.assertCountEqual(self, self.G.edges(), [('a', 'b'), ('a', 'c'), ('a', 'd')]) self.assertEqual(self.G.parent_node, 'a') self.assertSetEqual(self.G.children_nodes, {'b', 'c', 'd'}) self.assertRaises(ValueError, self.G.add_edge, 'b', 'c') self.assertRaises(ValueError, self.G.add_edge, 'd', 'f') self.assertRaises(ValueError, self.G.add_edge, 'e', 'f') self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'e'), ('b', 'f')]) self.assertRaises(ValueError, self.G.add_edges_from, [('b', 'f')]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) six.assertCountEqual(self, self.G.nodes(), [1, 2]) self.assertListEqual(self.G.edges(), [(1, 2)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2}) self.G.add_nodes_from([3, 4]) self.G.add_edge(1, 3) self.G.add_edge(1, 4) six.assertCountEqual(self, self.G.nodes(), [1, 2, 3, 4]) six.assertCountEqual(self, self.G.edges(), [(1, 2), (1, 3), (1, 4)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2, 3, 4}) self.assertRaises(ValueError, self.G.add_edge, 2, 3) self.assertRaises(ValueError, self.G.add_edge, 3, 6) self.assertRaises(ValueError, self.G.add_edge, 5, 6) self.assertRaises(ValueError, self.G.add_edges_from, [(1, 5), (2, 6)]) self.assertRaises(ValueError, self.G.add_edges_from, [(2, 6)]) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') self.assertRaises(ValueError, self.G.add_edge, 1, 1) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = NaiveBayes([('a', 'b'), ('a', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['a']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['a']) def tearDown(self): del self.G
random_columns = pickle.load(fp) with open("RandomIndices.txt", "rb") as fp: random_indices = pickle.load(fp) data = data.iloc[:, random_columns] column_size = data.shape[1] #Delete invoices with all zeros from the data data = data[(data.T != 0).any()] row_size = data.shape[0] smallDF = data.iloc[random_indices, :] smallDF.shape DictOfModels = {} for productName in smallDF.columns: print('Collecting model for {0}'.format(productName)) model = NaiveBayes() model.add_nodes_from(Nodes[productName]) model.add_edges_from(Edges[productName]) model.add_cpds(*CPD[productName]) DictOfModels[productName] = model #Save edge ,node, CPD information PseudoCounts = {} #Pseudocounts are given (1,1) for uniform for productName in smallDF.columns: PseudoCounts[productName] = [1, 1] except: print('Existing model not found') #Select random invoice (2000) and products (50) seed(0) column_size = data.shape[1] random_columns = sample(range(column_size), 100) data = data.iloc[:, random_columns] #Delete invoices with all zeros from the data
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = NaiveBayes() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = NaiveBayes([("a", "b"), ("a", "c")]) six.assertCountEqual(self, list(self.g.nodes()), ["a", "b", "c"]) six.assertCountEqual(self, list(self.g.edges()), [("a", "b"), ("a", "c")]) self.assertEqual(self.g.parent_node, "a") self.assertSetEqual(self.g.children_nodes, {"b", "c"}) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("b", "c")]) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("c", "b")]) self.assertRaises(ValueError, NaiveBayes, [("a", "b"), ("d", "e")]) def test_class_init_with_data_nonstring(self): self.g = NaiveBayes([(1, 2), (1, 3)]) six.assertCountEqual(self, list(self.g.nodes()), [1, 2, 3]) six.assertCountEqual(self, list(self.g.edges()), [(1, 2), (1, 3)]) self.assertEqual(self.g.parent_node, 1) self.assertSetEqual(self.g.children_nodes, {2, 3}) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (2, 3)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 2)]) self.assertRaises(ValueError, NaiveBayes, [(1, 2), (3, 4)]) def test_add_node_string(self): self.G.add_node("a") self.assertListEqual(list(self.G.nodes()), ["a"]) def test_add_node_nonstring(self): self.G.add_node(1) self.assertListEqual(list(self.G.nodes()), [1]) def test_add_nodes_from_string(self): self.G.add_nodes_from(["a", "b", "c", "d"]) six.assertCountEqual(self, list(self.G.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) six.assertCountEqual(self, list(self.G.nodes()), [1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge("a", "b") six.assertCountEqual(self, list(self.G.nodes()), ["a", "b"]) self.assertListEqual(list(self.G.edges()), [("a", "b")]) self.assertEqual(self.G.parent_node, "a") self.assertSetEqual(self.G.children_nodes, {"b"}) self.G.add_nodes_from(["c", "d"]) self.G.add_edge("a", "c") self.G.add_edge("a", "d") six.assertCountEqual(self, list(self.G.nodes()), ["a", "b", "c", "d"]) six.assertCountEqual( self, list(self.G.edges()), [("a", "b"), ("a", "c"), ("a", "d")] ) self.assertEqual(self.G.parent_node, "a") self.assertSetEqual(self.G.children_nodes, {"b", "c", "d"}) self.assertRaises(ValueError, self.G.add_edge, "b", "c") self.assertRaises(ValueError, self.G.add_edge, "d", "f") self.assertRaises(ValueError, self.G.add_edge, "e", "f") self.assertRaises(ValueError, self.G.add_edges_from, [("a", "e"), ("b", "f")]) self.assertRaises(ValueError, self.G.add_edges_from, [("b", "f")]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) six.assertCountEqual(self, list(self.G.nodes()), [1, 2]) self.assertListEqual(list(self.G.edges()), [(1, 2)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2}) self.G.add_nodes_from([3, 4]) self.G.add_edge(1, 3) self.G.add_edge(1, 4) six.assertCountEqual(self, list(self.G.nodes()), [1, 2, 3, 4]) six.assertCountEqual(self, list(self.G.edges()), [(1, 2), (1, 3), (1, 4)]) self.assertEqual(self.G.parent_node, 1) self.assertSetEqual(self.G.children_nodes, {2, 3, 4}) self.assertRaises(ValueError, self.G.add_edge, 2, 3) self.assertRaises(ValueError, self.G.add_edge, 3, 6) self.assertRaises(ValueError, self.G.add_edge, 5, 6) self.assertRaises(ValueError, self.G.add_edges_from, [(1, 5), (2, 6)]) self.assertRaises(ValueError, self.G.add_edges_from, [(2, 6)]) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, "a", "a") self.assertRaises(ValueError, self.G.add_edge, 1, 1) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")]) def test_update_node_parents_bm_constructor(self): self.g = NaiveBayes([("a", "b"), ("a", "c")]) self.assertListEqual(list(self.g.predecessors("a")), []) self.assertListEqual(list(self.g.predecessors("b")), ["a"]) self.assertListEqual(list(self.g.predecessors("c")), ["a"]) def test_update_node_parents(self): self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edges_from([("a", "b"), ("a", "c")]) self.assertListEqual(list(self.G.predecessors("a")), []) self.assertListEqual(list(self.G.predecessors("b")), ["a"]) self.assertListEqual(list(self.G.predecessors("c")), ["a"]) def tearDown(self): del self.G