def test_find_MAP(): print '-' * 80 G = MarkovModel() G.add_nodes_from(['x1', 'x2', 'x3']) G.add_edges_from([('x1', 'x2'), ('x1', 'x3')]) phi = [ DiscreteFactor(['x2', 'x1'], cardinality=[2, 2], values=np.array([[1.0 / 1, 1.0 / 2], [1.0 / 3, 1.0 / 4]])), DiscreteFactor(['x3', 'x1'], cardinality=[2, 2], values=np.array([[1.0 / 1, 1.0 / 2], [1.0 / 3, 1.0 / 4]])) ] # DiscreteFactor(['x1'], cardinality=[2], # values=np.array([2,2]))] G.add_factors(*phi) print "nodes:", G.nodes() bp = BeliefPropagation(G) bp.max_calibrate() # bp.calibrate() clique_beliefs = bp.get_clique_beliefs() print clique_beliefs print clique_beliefs[('x1', 'x2')] print clique_beliefs[('x1', 'x3')] # print 'partition function should be', np.sum(clique_beliefs[('x1', 'x3')].values) phi_query = bp._query(['x1', 'x2', 'x3'], operation='maximize') # phi_query = bp._query(['x1', 'x2', 'x3'], operation='marginalize') print phi_query sleep(52)
class generate(object): def __init__(self, adj_mat=None, struct=None): DEBUG = False self.G = MarkovModel() self.n_nodes = adj_mat.shape[0] if DEBUG: print 'struct', struct if struct == 'complete': self._complete_graph(adj_mat) if struct == 'nodes': self._nodes_only(adj_mat) if struct is None: self._import_adj(adj_mat) self._ising_factors(Wf=5, Wi=5, f_type='mixed') if DEBUG: print 'generate_init', self.G, self.G.nodes() def get_model(self): return self.G def _complete_graph(self, adj_mat): """ generate the complete graph over len(adj_mat) """ self._nodes_only(adj_mat) for i in range(self.n_nodes): self.G.add_edges_from([(i, j) for j in range(self.n_nodes)]) def _import_adj(self, adj_mat): """ add nodes and edges to graph adj_mat - square matrix, numpy array like """ DEBUG = False assert (adj_mat is not None), "can't import empty adj mat" # add nodes self._nodes_only(adj_mat) # add edges for i in range(self.n_nodes): edges_list = ([(i, j) for j in range(self.n_nodes) if adj_mat[i][j]]) if DEBUG: print edges_list self.G.add_edges_from(edges_list) if DEBUG: print len(self.G) def _nodes_only(self, adj_mat): """ add nodes to graph adj_mat - aquare matrix, numpy array like """ global DEBUG assert (adj_mat is not None), "can't import empty adj mat" assert (self.n_nodes == adj_mat.shape[1]), "adj_mat is not sqaure" self.G.add_nodes_from([i for i in range(self.n_nodes)]) if DEBUG: print '_nodes_only', [i for i in range(self.n_nodes)] if DEBUG: print '_nodes_only print G', self.G.nodes() assert (self.n_nodes == len(self.G)), "graph size is incosistent with adj_mat" def _ising_factors(self, Wf=1, Wi=1, f_type='mixed'): """ Add ising-like factors to model graph cardinality is the number of possible values in our case we have boolean nodes, thus cardinality = 2 Wf = \theta_i = ~U[-Wf, Wf] type = 'mixed' = ~U[-Wi,Wi] 'attractive' = ~U[0,Wi] """ self._field_factors(Wf) self._interact_factors(Wi, f_type) def _field_factors(self, w, states=2): """ this function assigns factor for single node currently states=2 for ising model generation """ for i in self.G.nodes(): phi_i = Factor([i], [states], self._wf(w, states)) self.G.add_factors(phi_i) def _interact_factors(self, w, f_type, states=2): """ this function assigns factor for two interacting nodes currently states=2 for ising model generation """ for e in self.G.edges(): # if DEBUG: print 'interact_factors edges,states, values',e,[e[0], # e[1]],len(e)*[states], self._wi(w, f_type, states) phi_ij = Factor([e[0], e[1]], [states] * len(e), self._wi(w, f_type, states)) self.G.add_factors(phi_ij) def _wf(self, w, k): """ generate field factor """ # if DEBUG: print 'w',type(w),w return np.random.uniform(low=-1 * w, high=w, size=k) def _wi(self, w, f_type, k): """ generate interaction factor current support only for k=2 """ # if DEBUG: print 'w',type(w),w a_ij = np.random.uniform(low=-1 * w, high=w) if f_type == 'mixed': dis_aij = -a_ij else: # f_type == 'attractive': dis_aij = 0 # else: # print 'f_type error' return [a_ij, dis_aij, dis_aij, a_ij]
class Network_handler: ''' Handles creation and usage of the probabilistic network over CERN's data. Can deal only with a SINGLE file-priority combination. Note that the methods of this class have numbers and must be called in order. ''' def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix() def learn_structure(self, method, scoring_method, log=True): ''' (4) Method that builds the structure of the data ----------------- Parameters: method : The technique used to search for the structure -> scoring_approx - To use an approximated search with scoring method -> scoring_exhaustive - To use an exhaustive search with scoring method -> constraint - To use the constraint based technique scoring_method : K2, bic, bdeu log - "True" if you want to print debug information in the console ''' #Select the scoring method for the local search of the structure if scoring_method == "K2": scores = K2Score(self.data) elif scoring_method == "bic": scores = BicScore(self.data) elif scoring_method == "bdeu": scores = BdeuScore(self.data) #Select the actual method if method == "scoring_approx": est = HillClimbSearch(self.data, scores) elif method == "scoring_exhaustive": est = ExhaustiveSearch(self.data, scores) elif method == "constraint": est = ConstraintBasedEstimator(self.data) self.best_model = est.estimate() self.eliminate_isolated_nodes( ) # REMOVE all nodes not connected to anything else for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) self.log("Method used for structural learning: " + method, log) #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log) self.log("Search terminated", log) def estimate_parameters(self, log=True): ''' (5) Estimates the parameters of the found network ''' estimator = BayesianEstimator(self.best_model, self.data) self.file_writer.write_txt("Number of nodes: " + str(len(self.variables_names))) self.file_writer.write_txt("Complete list: " + str(self.variables_names)) for node in self.best_model.nodes(): cpd = estimator.estimate_cpd(node, prior_type='K2') self.best_model.add_cpds(cpd) self.log(cpd, log) self.file_writer.write_txt(cpd.__str__()) def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) ''' map_query = inference.map_query(variables, evidence) print(map_query) ''' def draw_network(self, label_choice, location_choice, location, log): ''' (7) Draws the bayesian network. ---- location_choice = True iff we want to show the location of devices in the graph. label_choice = "single" if we want to show single label, "double" for double label of arcs location = 0,1,2 depending by the location (H0, H1, H2) ''' bn_graph = gv.Digraph(format="png") # Extract color based on the building if location_choice: devices = self.variables_names device_location = dict() device_locationH1 = dict() #For H0 for d in devices: allDevicesLocations = self.general_handler.get_device_locations( ) device_location[d] = allDevicesLocations[d][0] device_locationH1[d] = allDevicesLocations[d][1] #temp for H1 location_color = self.assign_color(device_location) location_colorH1 = self.assign_color(device_locationH1) ''' # Logging and saving info self.log(device_location, log) self.log(location_color, log) self.file_writer.write_txt(device_location, newline = True) self.file_writer.write_txt(location_color, newline = True) ''' # Creating the subgraphs, one for each location: loc_subgraphs = dict() for loc in location_color: name = "cluster_" + loc loc_subgraphs[loc] = gv.Digraph(name) loc_subgraphs[loc].graph_attr[ 'label'] = loc #Label with name to be visualized in the image # Create nodes for node in self.best_model.nodes(): if location_choice: locationH0 = device_location[node] locationH1 = device_locationH1[node] loc_subgraphs[locationH0].node( node, style='filled', fillcolor=location_colorH1[locationH1] ) #add the node to the right subgraph #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0 else: bn_graph.node(node) # Add all subgraphs in the final graph: if location_choice: for loc in loc_subgraphs: bn_graph.subgraph(loc_subgraphs[loc]) # Create and color edges for edge in self.best_model.edges_iter(): inference = VariableElimination(self.best_model) label = "" # Inference for first label and color of edges variables = [edge[1]] evidence = dict() evidence[edge[0]] = 1 phi_query = inference.query(variables, evidence) value = phi_query[edge[1]].values[1] value = round(value, 2) if label_choice == "single": label = str(value) if label_choice == "double": # Inference for second label variables = [edge[0]] evidence = dict() evidence[edge[1]] = 1 phi_query = inference.query(variables, evidence) value_inv = phi_query[edge[0]].values[1] value_inv = round(value_inv, 2) label = str(value) + "|" + str(value_inv) if value >= 0.75: bn_graph.edge(edge[0], edge[1], color="red", label=label) else: bn_graph.edge(edge[0], edge[1], color="black", label=label) # Save the .png graph if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix else: if location_choice: locat = "_H0H1" else: locat = "" imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat bn_graph.render(imgPath) os.remove(imgPath) #remove the source code generated by graphviz def data_info(self, selection, log): ''' (9) Prints or logs some extra information about the data or the network ''' # 1 - DEVICE FREQUENCY AND OCCURRENCES if 1 in selection: self.file_writer.write_txt( "Device ranking (max 20 devices are visualized)", newline=True) i = 1 for dr in self.rankedDevices: self.file_writer.write_txt(dr[0] + " \t" + str(dr[1]) + "\t" + str(dr[2])) i = i + 1 if i == 20: break # 2 - EDGES OF THE NETWORK if 2 in selection: self.file_writer.write_txt("Edges of the network:", newline=True) for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) # 3 - MARKOV NETWORK if 3 in selection: self.markov = self.best_model.to_markov_model( ) #create the markov model from the BN nice_graph = pydot.Dot(graph_type='graph') for node in self.markov.nodes(): node_pydot = pydot.Node(node) nice_graph.add_node(node_pydot) for edge in self.markov.edges(): edge_pydot = pydot.Edge(edge[0], edge[1], color="black") nice_graph.add_edge(edge_pydot) nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-markov.png') self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True) for factor in self.markov.factors: self.log("MARKOV---------------------------------------", log) self.log(factor, log) self.file_writer.write_txt(factor.__str__()) # 4 - INFERENCE NETWORK if 4 in selection: nice_graph = pydot.Dot(graph_type='digraph') nodes = self.best_model.nodes() inference = VariableElimination(self.best_model) for node1 in nodes: pos = nodes.index(node1) + 1 for i in range(pos, len(nodes)): node2 = nodes[i] variables = [node2] evidence = dict() evidence[node1] = 1 phi_query = inference.query(variables, evidence) prob1 = phi_query[node2].values[ 1] #probability of direct activation (inference from node1=1 to node2) variables = [node1] evidence = dict() evidence[node2] = 1 phi_query = inference.query(variables, evidence) prob2 = phi_query[node1].values[ 1] #probability of inverse activation (inference from node2=1 to node1) prob1 = round(prob1, 2) prob2 = round(prob2, 2) if prob1 >= 0.75 and ( prob1 - prob2 ) <= 0.40: #add direct arc from node1 to node2 ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="red", label=double_label)) elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="red", label=double_label)) elif prob1 >= 0.75 and prob2 >= 0.75: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="orange", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="orange", label=double_label)) elif prob1 >= 0.55 and prob2 >= 0.55: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="black", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="black", label=double_label)) if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix nice_graph.write_png(imgPath + "-inference_network.png") else: nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-inference_network.png') def fix_node_presence(self, nodes, pydot_graph): ''' Adds the list of nodes to the graph, if they are not already present ''' for node in nodes: if node not in pydot_graph.get_nodes(): pydot_graph.add_node(pydot.Node(node)) def eliminate_isolated_nodes(self): ''' If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph ''' for nodeX in self.best_model.nodes(): tup = [item for item in self.best_model.edges() if nodeX in item] if not tup: self.file_writer.write_txt( "Node " + str(nodeX) + " has no edges: it has been eliminated.") self.best_model.remove_node(nodeX) if self.best_model.nodes() == []: raise DataError("No nodes left in this file-priority combination.") def assign_color(self, device_location): ''' Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS) ''' system_color = [ 'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey', 'Light Red', 'Light Blue', 'Light Green' ] location_color = dict() # key = location; value = color for dev, loc in device_location.items(): if loc not in location_color: color = system_color[0] system_color.remove(color) location_color[loc] = color return location_color def log(self, text, log): ''' Prints the text in the console, if the "log" condition is True. ''' if log: print(text)
import numpy as np import pandas as pd from pgmpy.models import MarkovModel from pgmpy.estimators import MaximumLikelihoodEstimator # Generating random data raw_data = np.random.randint(low=0, high=2, size=(1000, 2)) data = pd.DataFrame(raw_data, columns=['X', 'Y']) model = MarkovModel() model.fit(data, estimator=MaximumLikelihoodEstimator) model.get_factors() model.nodes() model.edges()
class TestMarkovModelCreation(unittest.TestCase): def setUp(self): self.graph = MarkovModel() def test_class_init_without_data(self): self.assertIsInstance(self.graph, MarkovModel) def test_class_init_with_data_string(self): self.g = MarkovModel([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.g.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [["a", "b"], ["b", "c"]]) def test_class_init_with_data_nonstring(self): self.g = MarkovModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.graph.add_node("a") self.assertListEqual(list(self.graph.nodes()), ["a"]) def test_add_node_nonstring(self): self.graph.add_node(1) def test_add_nodes_from_string(self): self.graph.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge("d", "e") self.assertListEqual(sorted(self.graph.nodes()), ["d", "e"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["d", "e"]]) self.graph.add_nodes_from(["a", "b", "c"]) self.graph.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.graph.add_edge, "a", "a") def test_add_edges_from_string(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [["a", "b"], ["b", "c"]]) self.graph.add_nodes_from(["d", "e", "f"]) self.graph.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.graph.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.graph.add_edges_from, [("a", "a")]) def test_number_of_neighbors(self): self.graph.add_edges_from([("a", "b"), ("b", "c")]) self.assertEqual(len(list(self.graph.neighbors("b"))), 2) def tearDown(self): del self.graph
class TestMarkovModelCreation(unittest.TestCase): def setUp(self): self.graph = MarkovModel() def test_class_init_without_data(self): self.assertIsInstance(self.graph, MarkovModel) def test_class_init_with_data_string(self): self.g = MarkovModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): self.g = MarkovModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.graph.add_edge, 'a', 'a') def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual( hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.graph.add_edges_from, [('a', 'a')]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestMarkovModelCreation(unittest.TestCase): def setUp(self): self.graph = MarkovModel() def test_class_init_without_data(self): self.assertIsInstance(self.graph, MarkovModel) def test_class_init_with_data_string(self): self.g = MarkovModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): self.g = MarkovModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.graph.add_node('a') self.assertListEqual(self.graph.nodes(), ['a']) def test_add_node_nonstring(self): self.graph.add_node(1) def test_add_nodes_from_string(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.graph.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.graph.add_edge('d', 'e') self.assertListEqual(sorted(self.graph.nodes()), ['d', 'e']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['d', 'e']]) self.graph.add_nodes_from(['a', 'b', 'c']) self.graph.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.graph.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.graph.add_edge, 'a', 'a') def test_add_edges_from_string(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), [['a', 'b'], ['b', 'c']]) self.graph.add_nodes_from(['d', 'e', 'f']) self.graph.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.graph.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.graph.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.graph.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.graph.add_edges_from, [('a', 'a')]) def test_number_of_neighbors(self): self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(len(self.graph.neighbors('b')), 2) def tearDown(self): del self.graph
class TestGibbsSampling(unittest.TestCase): def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model) def tearDown(self): del self.bayesian_model del self.markov_model @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True) @patch('pgmpy.models.MarkovChain.__init__', autospec=True) def test_init_bayesian_model(self, init, get_kernel): model = MagicMock(spec_set=BayesianModel) gibbs = GibbsSampling(model) init.assert_called_once_with(gibbs) get_kernel.assert_called_once_with(gibbs, model) @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True) def test_init_markov_model(self, get_kernel): model = MagicMock(spec_set=MarkovModel) gibbs = GibbsSampling(model) get_kernel.assert_called_once_with(gibbs, model) def test_get_kernel_from_bayesian_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_bayesian_model(self.bayesian_model) self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3}) def test_get_kernel_from_markov_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_markov_model(self.markov_model) self.assertListEqual(list(gibbs.variables), self.markov_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2}) def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2})) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2) def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'}) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_generate_sample_less_arg(self, random_state): self.gibbs.state = None gen = self.gibbs.generate_sample(size=2) samples = [sample for sample in gen] random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(samples), 2)
" [0, 0]])" data = pd.DataFrame(raw_data, columns=['A', 'B']) print(data) # Two coins toss result " X Y " "0 1 1 " " ......." "98 0 0 " # Markov Model markov_model = MarkovModel() markov_model.fit(data, estimator=MaximumLikelihoodEstimator) markov_model.get_factors() " <Factor representing Phi(A: 2, B:2) at ejbdfouaeboidjfnaeif>, " markov_model.nodes() " ['A', 'B'] " markov_model.edges() " [('A','B')] " -2- " Bayesian Score for learning structure " import numpy as np import pandas as pd from pgmpy.models import MarkovModel from pgmpy.estimators import BayesianEstimator # Generating some data raw_data = np.random.randint(low=0, high=2, size=(1000,2))
class TestGibbsSampling(unittest.TestCase): def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = DiscreteFactor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model) def tearDown(self): del self.bayesian_model del self.markov_model @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True) @patch('pgmpy.models.MarkovChain.__init__', autospec=True) def test_init_bayesian_model(self, init, get_kernel): model = MagicMock(spec_set=BayesianModel) gibbs = GibbsSampling(model) init.assert_called_once_with(gibbs) get_kernel.assert_called_once_with(gibbs, model) @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True) def test_init_markov_model(self, get_kernel): model = MagicMock(spec_set=MarkovModel) gibbs = GibbsSampling(model) get_kernel.assert_called_once_with(gibbs, model) def test_get_kernel_from_bayesian_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_bayesian_model(self.bayesian_model) self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes()) self.assertDictEqual(gibbs.cardinalities, { 'diff': 2, 'intel': 2, 'grade': 3 }) def test_get_kernel_from_markov_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_markov_model(self.markov_model) self.assertListEqual(list(gibbs.variables), self.markov_model.nodes()) self.assertDictEqual(gibbs.cardinalities, { 'A': 2, 'B': 3, 'C': 4, 'D': 2 }) def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2})) @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True) def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [ State('diff', 0), State('intel', 0), State('grade', 0) ] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2) def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual( {samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual( {samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'}) @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True) def test_generate_sample_less_arg(self, random_state): self.gibbs.state = None gen = self.gibbs.generate_sample(size=2) samples = [sample for sample in gen] random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(samples), 2)