#data_size = 10000 data_size = len(data) #define our Bayesian model model = BayesianModel([('TQ', 'DFT'), ('DPQ', 'DI'), ('C', 'DI'), ('DI', 'DFT'), ('DI', 'RD'), ('DFT', 'RD'), ('RD', 'DFO'), ('OU', 'DFO')]) #learn all the conditional probabilities for each node in our models using input data as training set model.fit(data, estimator_type=BayesianEstimator, prior_type="BDeu", equivalent_sample_size=10) # #http://pgmpy.org/inference.html inference = VariableElimination(model) # # nodes = ['DPQ','C','TQ','DI','DFT','RD','OU','DFO'] # Distribution = {} # for key in pr.keys(): # Distribution[key]=[1-abs(np.sign(pr[key]-i)) for i in range(5)] # #Distribution = {'C': [0, 0, 0, 0, 1], 'DPQ': [1, 0, 0, 0, 0], 'TQ': [1, 0, 0, 0, 0]} # nodes.remove(key) # #nodes = ['DI', 'DFT', 'RD', 'OU', 'DFO'] # # for key in nodes: # Distribution[key]=inference.query([key], evidence=pr)[key].values mm = model.to_markov_model() mm.nodes() mm.edges()
class Network_handler: ''' Handles creation and usage of the probabilistic network over CERN's data. Can deal only with a SINGLE file-priority combination. Note that the methods of this class have numbers and must be called in order. ''' def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix() def learn_structure(self, method, scoring_method, log=True): ''' (4) Method that builds the structure of the data ----------------- Parameters: method : The technique used to search for the structure -> scoring_approx - To use an approximated search with scoring method -> scoring_exhaustive - To use an exhaustive search with scoring method -> constraint - To use the constraint based technique scoring_method : K2, bic, bdeu log - "True" if you want to print debug information in the console ''' #Select the scoring method for the local search of the structure if scoring_method == "K2": scores = K2Score(self.data) elif scoring_method == "bic": scores = BicScore(self.data) elif scoring_method == "bdeu": scores = BdeuScore(self.data) #Select the actual method if method == "scoring_approx": est = HillClimbSearch(self.data, scores) elif method == "scoring_exhaustive": est = ExhaustiveSearch(self.data, scores) elif method == "constraint": est = ConstraintBasedEstimator(self.data) self.best_model = est.estimate() self.eliminate_isolated_nodes( ) # REMOVE all nodes not connected to anything else for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) self.log("Method used for structural learning: " + method, log) #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log) self.log("Search terminated", log) def estimate_parameters(self, log=True): ''' (5) Estimates the parameters of the found network ''' estimator = BayesianEstimator(self.best_model, self.data) self.file_writer.write_txt("Number of nodes: " + str(len(self.variables_names))) self.file_writer.write_txt("Complete list: " + str(self.variables_names)) for node in self.best_model.nodes(): cpd = estimator.estimate_cpd(node, prior_type='K2') self.best_model.add_cpds(cpd) self.log(cpd, log) self.file_writer.write_txt(cpd.__str__()) def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) ''' map_query = inference.map_query(variables, evidence) print(map_query) ''' def draw_network(self, label_choice, location_choice, location, log): ''' (7) Draws the bayesian network. ---- location_choice = True iff we want to show the location of devices in the graph. label_choice = "single" if we want to show single label, "double" for double label of arcs location = 0,1,2 depending by the location (H0, H1, H2) ''' bn_graph = gv.Digraph(format="png") # Extract color based on the building if location_choice: devices = self.variables_names device_location = dict() device_locationH1 = dict() #For H0 for d in devices: allDevicesLocations = self.general_handler.get_device_locations( ) device_location[d] = allDevicesLocations[d][0] device_locationH1[d] = allDevicesLocations[d][1] #temp for H1 location_color = self.assign_color(device_location) location_colorH1 = self.assign_color(device_locationH1) ''' # Logging and saving info self.log(device_location, log) self.log(location_color, log) self.file_writer.write_txt(device_location, newline = True) self.file_writer.write_txt(location_color, newline = True) ''' # Creating the subgraphs, one for each location: loc_subgraphs = dict() for loc in location_color: name = "cluster_" + loc loc_subgraphs[loc] = gv.Digraph(name) loc_subgraphs[loc].graph_attr[ 'label'] = loc #Label with name to be visualized in the image # Create nodes for node in self.best_model.nodes(): if location_choice: locationH0 = device_location[node] locationH1 = device_locationH1[node] loc_subgraphs[locationH0].node( node, style='filled', fillcolor=location_colorH1[locationH1] ) #add the node to the right subgraph #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0 else: bn_graph.node(node) # Add all subgraphs in the final graph: if location_choice: for loc in loc_subgraphs: bn_graph.subgraph(loc_subgraphs[loc]) # Create and color edges for edge in self.best_model.edges_iter(): inference = VariableElimination(self.best_model) label = "" # Inference for first label and color of edges variables = [edge[1]] evidence = dict() evidence[edge[0]] = 1 phi_query = inference.query(variables, evidence) value = phi_query[edge[1]].values[1] value = round(value, 2) if label_choice == "single": label = str(value) if label_choice == "double": # Inference for second label variables = [edge[0]] evidence = dict() evidence[edge[1]] = 1 phi_query = inference.query(variables, evidence) value_inv = phi_query[edge[0]].values[1] value_inv = round(value_inv, 2) label = str(value) + "|" + str(value_inv) if value >= 0.75: bn_graph.edge(edge[0], edge[1], color="red", label=label) else: bn_graph.edge(edge[0], edge[1], color="black", label=label) # Save the .png graph if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix else: if location_choice: locat = "_H0H1" else: locat = "" imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat bn_graph.render(imgPath) os.remove(imgPath) #remove the source code generated by graphviz def data_info(self, selection, log): ''' (9) Prints or logs some extra information about the data or the network ''' # 1 - DEVICE FREQUENCY AND OCCURRENCES if 1 in selection: self.file_writer.write_txt( "Device ranking (max 20 devices are visualized)", newline=True) i = 1 for dr in self.rankedDevices: self.file_writer.write_txt(dr[0] + " \t" + str(dr[1]) + "\t" + str(dr[2])) i = i + 1 if i == 20: break # 2 - EDGES OF THE NETWORK if 2 in selection: self.file_writer.write_txt("Edges of the network:", newline=True) for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) # 3 - MARKOV NETWORK if 3 in selection: self.markov = self.best_model.to_markov_model( ) #create the markov model from the BN nice_graph = pydot.Dot(graph_type='graph') for node in self.markov.nodes(): node_pydot = pydot.Node(node) nice_graph.add_node(node_pydot) for edge in self.markov.edges(): edge_pydot = pydot.Edge(edge[0], edge[1], color="black") nice_graph.add_edge(edge_pydot) nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-markov.png') self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True) for factor in self.markov.factors: self.log("MARKOV---------------------------------------", log) self.log(factor, log) self.file_writer.write_txt(factor.__str__()) # 4 - INFERENCE NETWORK if 4 in selection: nice_graph = pydot.Dot(graph_type='digraph') nodes = self.best_model.nodes() inference = VariableElimination(self.best_model) for node1 in nodes: pos = nodes.index(node1) + 1 for i in range(pos, len(nodes)): node2 = nodes[i] variables = [node2] evidence = dict() evidence[node1] = 1 phi_query = inference.query(variables, evidence) prob1 = phi_query[node2].values[ 1] #probability of direct activation (inference from node1=1 to node2) variables = [node1] evidence = dict() evidence[node2] = 1 phi_query = inference.query(variables, evidence) prob2 = phi_query[node1].values[ 1] #probability of inverse activation (inference from node2=1 to node1) prob1 = round(prob1, 2) prob2 = round(prob2, 2) if prob1 >= 0.75 and ( prob1 - prob2 ) <= 0.40: #add direct arc from node1 to node2 ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="red", label=double_label)) elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="red", label=double_label)) elif prob1 >= 0.75 and prob2 >= 0.75: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="orange", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="orange", label=double_label)) elif prob1 >= 0.55 and prob2 >= 0.55: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="black", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="black", label=double_label)) if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix nice_graph.write_png(imgPath + "-inference_network.png") else: nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-inference_network.png') def fix_node_presence(self, nodes, pydot_graph): ''' Adds the list of nodes to the graph, if they are not already present ''' for node in nodes: if node not in pydot_graph.get_nodes(): pydot_graph.add_node(pydot.Node(node)) def eliminate_isolated_nodes(self): ''' If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph ''' for nodeX in self.best_model.nodes(): tup = [item for item in self.best_model.edges() if nodeX in item] if not tup: self.file_writer.write_txt( "Node " + str(nodeX) + " has no edges: it has been eliminated.") self.best_model.remove_node(nodeX) if self.best_model.nodes() == []: raise DataError("No nodes left in this file-priority combination.") def assign_color(self, device_location): ''' Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS) ''' system_color = [ 'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey', 'Light Red', 'Light Blue', 'Light Green' ] location_color = dict() # key = location; value = color for dev, loc in device_location.items(): if loc not in location_color: color = system_color[0] system_color.remove(color) location_color[loc] = color return location_color def log(self, text, log): ''' Prints the text in the console, if the "log" condition is True. ''' if log: print(text)
cpd_rain = TabularCPD('Rain', 2, [[0.4], [0.6]]) cpd_accident = TabularCPD('Accident', 2, [[0.2], [0.8]]) cpd_traffic_jam = TabularCPD('TrafficJam', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['Rain', 'Accident'], evidence_card=[2, 2]) model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam) model.add_node('LongQueues') model.add_edge('TrafficJam', 'LongQueues') cpd_long_queues = TabularCPD('LongQueues', 2, [[0.9, 0.2], [0.1, 0.8]], evidence=['TrafficJam'], evidence_card=[2]) model.add_cpds(cpd_long_queues) model.add_nodes_from(['GettingUpLate', 'LateForSchool']) model.add_edges_from([('GettingUpLate', 'LateForSchool'), ('TrafficJam', 'LateForSchool')]) cpd_getting_up_late = TabularCPD('GettingUpLate', 2, [[0.6], [0.4]]) cpd_late_for_school = TabularCPD('LateForSchool', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['GettingUpLate', 'TrafficJam'], evidence_card=[2, 2]) model.add_cpds(cpd_getting_up_late, cpd_late_for_school) # Conversion from BayesianModel to MarkovModel is accomplished by mm = model.to_markov_model() mm.edges()
from pgmpy.inference import VariableElimination from jta import readAdjList, jta, get_different if __name__ == '__main__': V, E, CPD = readAdjList("./tricky_adjlist_dgm.csv") DGM = BayesianModel() DGM.add_nodes_from(V) DGM.add_edges_from(E) DGM.add_cpds(*CPD) # print CPDs # for cpd in DGM.get_cpds(): # print cpd UGM = DGM.to_markov_model() jtree = UGM.to_junction_tree() evidence = {'A': 1} marginal = jta(UGM, jtree, evidence.items()) print "Results of the implemented JTA" for m in marginal: print m print "\n=======================================\n" print "Results of the Variable Elimination from pgmpy" inference = VariableElimination(DGM) for v in get_different(DGM.nodes(), evidence):
cpd1.append(p1) cpd1.append(p_21) cpd1.append(p_52) cpd1.append(p_32) cpd1.append(p_46) cpd1.append(p_61) model2.add_cpds(*cpd1) '''generate data for model1''' inference = BayesianModelSampling(model2) data1 = inference.forward_sample(size=3000, return_type='dataframe') print("--------------------------------------------------") '''Markov model''' print("Coverting model2 to markov model:") print("----------------------------------------") mm = model2.to_markov_model() print("Nodes of model2 as markov:") print(mm.nodes()) print("Edges of model2 as markov:") print(mm.edges()) print("----------------------------------------------") print("Inference for th dataset") from pgmpy.inference import VariableElimination infer1 = VariableElimination(mm) print("Inference of x4:") print(infer1.query(['x4'])['x4']) print("Inference of x5|x2:") print(infer1.query(['x5'], evidence={'x2': 1})['x5']) print("---------------------------------------------------------") print("AND DATASET") data = pd.read_csv("AND-Features.csv")
evidence={'Musicianship':1,'Difficulty':0}) print(r_2s_g_m_s_d_l['Rating']) #Probability exam given musianship strong p_e = music_infer.query(variables=['Exam'],evidence={'Musicianship':1}) print(p_e["Exam"]) #Probability letter given rating is ** p_l_w_g_r_2_s = music_infer.query(variables=['Letter'],evidence={'Rating':1}) print(p_l_w_g_r_2_s['Letter']) #Proabiblity Letter is Strong, given no other information p_l_s = music_infer.query(variables=['Letter']) print(p_l_s['Letter']) #Probability Letter is Strong, given George not strong musician. p_l_s_m_w = music_infer.query(variables=['Letter'],evidence={'Musicianship':0}) print(p_l_s_m_w['Letter']) #---------Approximate Inference----------- #music_markov = music_model.to_markov_model() music_markov = music_model.to_markov_model() print(music_markov.check_model()) music_mplp = Mplp(music_markov) print(music_mplp.map_query(100))