def load_graph_sympt_id(df_cond, df_related_symptoms, sympt_id): G = BayesianModel() sub_symptom_list = [] condition_list = set() # Go through related symptoms table and add edges (symptom -> sub symptom) for i, row in df_related_symptoms.iterrows(): symptom_id = str(row[0]) if (symptom_id == sympt_id): for j, col in row.iteritems(): sub_symptom_id = str(j) if sub_symptom_id[:8] == "sub_symp": if col != 0: G.add_edge(symptom_id, sub_symptom_id) sub_symptom_list.append(sub_symptom_id) # Go through conditions table and add edges (sub symptom -> condition) for i, row in df_cond.iterrows(): cond_id = str(row[0]) for j, col in row.iteritems(): if col != 0.0: sub_symptom_id = str(j) if (sub_symptom_id in sub_symptom_list): G.add_edge(sub_symptom_id, cond_id) condition_list.add(cond_id) condition_list = list(condition_list) return G, sub_symptom_list, condition_list
def inf(self, file1): f1 = open(file1, encoding="utf8") lines = f1.readlines() i = 0 G = BayesianModel() nodeList = {} while i < len(lines): if lines[i] == '\n': break nodeName = self.getnode(lines[i]) valueNum = int(lines[i + 1]) cpd_str = lines[i + 2] sequence = self.getList(lines[i + 3]) card = self.getCard(lines[i + 4]) cpd = self.parseCpd(cpd_str, valueNum, card) l = {} l['nodeName'] = nodeName l['valueNum'] = valueNum l['cpd'] = cpd l['sequence'] = sequence l['card'] = card # l = [nodeName,valueNum,cpd,sequence,card] nodeList[nodeName] = l i += 5 edges = self.getegdes(lines[i + 1]) evidence2 = self.getValue(lines[i + 3]) # print(nodeList) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) for (this, node) in nodeList.items(): if node['sequence'][0] == '': cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], values=node['cpd']) else: cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], evidence=node['sequence'], evidence_card=node['card'], values=node['cpd']) G.add_cpds(cpt) if G.check_model(): # print('1') # belief_propagation = BeliefPropagation(G) inference = VariableElimination(G) result = '' for node in G.nodes(): if node not in evidence2: namelist = [node] result += node + ' ' phi_query = inference.query(variables=namelist, evidence=evidence2, show_progress=False).values result += str(phi_query) + '\n' print(result)
def pgm_generate(self, target, data, stats, subnodes): stats_pd = pd.Series(stats, name='p-values') MK_blanket_frame = stats_pd[stats_pd < 0.05] MK_blanket = [node for node in MK_blanket_frame.index if node in subnodes] subnodes_no_target = [node for node in subnodes if node != target] est = HillClimbSearch(data[subnodes_no_target], scoring_method=BicScore(data)) pgm_no_target = est.estimate() for node in MK_blanket: if node != target: pgm_no_target.add_edge(node,target) # Create the pgm pgm_explanation = BayesianModel() for node in pgm_no_target.nodes(): pgm_explanation.add_node(node) for edge in pgm_no_target.edges(): pgm_explanation.add_edge(edge[0],edge[1]) # Fit the pgm data_ex = data[subnodes].copy() data_ex[target] = data[target].apply(self.generalize_target) for node in subnodes_no_target: data_ex[node] = data[node].apply(self.generalize_others) pgm_explanation.fit(data_ex) return pgm_explanation
def vetor_Rede(solucao, nodes): G_aux = BayesianModel() #G_aux.add_nodes_from(nodes) k = 0 aux = 1 for i in range(1, len(nodes)): for j in range(aux): if solucao[k] == 1: if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes( ) and nx.has_path(G_aux, nodes[j], nodes[i]): return False else: G_aux.add_edge(nodes[i], nodes[j]) elif solucao[k] == 2: if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes( ) and nx.has_path(G_aux, nodes[i], nodes[j]): return False else: G_aux.add_edge(nodes[j], nodes[i]) k = k + 1 aux = aux + 1 for i in nodes: if i not in G_aux.nodes(): return False return G_aux
def single_bayes_net(df, independent_vars, dependent_vars): model = BayesianModel() model.add_nodes_from(independent_vars) for independent_var in independent_vars: for dependent_var in dependent_vars: model.add_edge(independent_var, dependent_var) model.fit(df) return model
def pgm_generate(self, target, data, pgm_stats, subnodes, child=None): subnodes = [str(int(node)) for node in subnodes] target = str(int(target)) subnodes_no_target = [node for node in subnodes if node != target] data.columns = data.columns.astype(str) MK_blanket = self.search_MK(data, target, subnodes_no_target.copy()) if child == None: est = HillClimbSearch(data[subnodes_no_target], scoring_method=BicScore(data)) pgm_no_target = est.estimate() for node in MK_blanket: if node != target: pgm_no_target.add_edge(node, target) # Create the pgm pgm_explanation = BayesianModel() for node in pgm_no_target.nodes(): pgm_explanation.add_node(node) for edge in pgm_no_target.edges(): pgm_explanation.add_edge(edge[0], edge[1]) # Fit the pgm data_ex = data[subnodes].copy() data_ex[target] = data[target].apply(self.generalize_target) for node in subnodes_no_target: data_ex[node] = data[node].apply(self.generalize_others) pgm_explanation.fit(data_ex) else: data_ex = data[subnodes].copy() data_ex[target] = data[target].apply(self.generalize_target) for node in subnodes_no_target: data_ex[node] = data[node].apply(self.generalize_others) est = HillClimbSearch(data_ex, scoring_method=BicScore(data_ex)) pgm_w_target_explanation = est.estimate() # Create the pgm pgm_explanation = BayesianModel() for node in pgm_w_target_explanation.nodes(): pgm_explanation.add_node(node) for edge in pgm_w_target_explanation.edges(): pgm_explanation.add_edge(edge[0], edge[1]) # Fit the pgm data_ex = data[subnodes].copy() data_ex[target] = data[target].apply(self.generalize_target) for node in subnodes_no_target: data_ex[node] = data[node].apply(self.generalize_others) pgm_explanation.fit(data_ex) return pgm_explanation
def make_power_plant_net(): BayesNet = BayesianModel() BayesNet.add_node('temperature') BayesNet.add_node('faulty gauge') BayesNet.add_node('gauge') BayesNet.add_node('faulty alarm') BayesNet.add_node('alarm') BayesNet.add_edge('temperature', 'faulty gauge') BayesNet.add_edge('temperature', 'gauge') BayesNet.add_edge('faulty gauge', 'gauge') BayesNet.add_edge('gauge', 'alarm') BayesNet.add_edge('faulty alarm', 'alarm') return BayesNet
def create_model_and_inference(): dep_df = pd.read_csv('dependencies.csv', sep=';') def connect(df, source, edgelist): source_df = df[df['Column2'] == source] for col in source_df.iloc[0, 3:len(source_df.columns)]: target_df = df[df['Column1'] == col]['Column2'] if not target_df.empty: target = target_df.item() if not (target, source) in edgelist: edgelist.append((source, target)) connect(df, target, edgelist) edges = [] connect(dep_df, 'myproximus-usage', edges) edges = [(t[1], t[0]) for t in edges] nodes = set(itertools.chain.from_iterable(edges)) nodes_df = dep_df.iloc[:, 1].to_frame() nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)] nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df = nodes_df.set_index('Column2').transpose() model = BayesianModel() model.add_nodes_from(nodes) for edge in edges: try: model.add_edge(edge[0], edge[1]) except: print('WARNING: tried to add edge which forms loop: ' + str(edge)) model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu") # for cpd in model.get_cpds(): # print(cpd) draw_network(model.nodes(), model.edges(), {}, []) return model, VariableElimination(model)
def fully_connected_model(nodes=None): if not nodes: nodes = [BOREDOM, DESIRE, MOBILE, MOTOR_HYPO, LEFT_ARM] network = BayesianModel() network.add_nodes_from(nodes) for hypo in nodes: if 'hypo' in hypo: for obs in nodes: if 'obs' in obs or 'motor' in obs: network.add_edge(u=hypo, v=obs) network.fit(TRAINING_DATA, estimator=BayesianEstimator, prior_type="BDeu") return network
def bayes_net_from_populational_data(data, independent_vars, dependent_vars): model = BayesianModel() model.add_nodes_from(independent_vars) for independent_var in independent_vars: for dependent_var in dependent_vars: model.add_edge(independent_var, dependent_var) cpd_list = [] state_names = BayesNetHelper.get_state_names_from_df( data, independent_vars | dependent_vars) for node in independent_vars | dependent_vars: cpd = BayesNetHelper.compute_cpd(model, node, data, state_names) cpd_list.append(cpd) model.add_cpds(*cpd_list) return model
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR): print('Making bayes net') graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p' if os.path.isfile(graph_file) and load == True: print('Loading saved graph from file...') G = pickle.load(open(graph_file, 'rb')) G.check_model() else: print('loading data...') training_labels, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() G.add_edges_from([(label, label + '_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [ c for c in networkx.ancestors(obo_graph, label) if c in labels_list ] for child in children: G.add_edge(child, label) predicted_cpds = get_model_cpds(labels_list=labels_list, modelsdir=MODEL_CPDS_DIR) for cpd in predicted_cpds: G.add_cpds(cpd) true_label_cpds = get_true_label_cpds(training_labels, go_dict, labels_list=labels_list) for cpd in true_label_cpds: G.add_cpds(cpd) remove_list = [] for node in G.nodes(): if G.get_cpds(node) == None: remove_list.append(node) # remove_list.append(node+'_hat') for node in remove_list: if node in G: G.remove_node(node) G.check_model() pickle.dump(G, open(graph_file, 'wb')) return G
def make_power_plant_net(): """Create a Bayes Net representation of the above power plant problem. Use the following as the name attribute: "alarm","faulty alarm", "gauge","faulty gauge", "temperature". (for the tests to work.) """ BayesNet = BayesianModel() # TODO: finish this function BayesNet.add_node("alarm") BayesNet.add_node("faulty alarm") BayesNet.add_node("gauge") BayesNet.add_node("faulty gauge") BayesNet.add_node("temperature") BayesNet.add_edge("temperature", "faulty gauge") BayesNet.add_edge("faulty alarm", "alarm") BayesNet.add_edge("temperature", "gauge") BayesNet.add_edge("faulty gauge", "gauge") BayesNet.add_edge("gauge", "alarm") return BayesNet
def createBayesModel(self, fileName): file = open(fileName) print(sys.argv[1]) lines = file.readlines() model = BayesianModel() edges = self.getegdes(lines[0]) for i in range(int(len(edges) / 2)): model.add_edge(edges[2 * i], edges[2 * i + 1]) for line in lines[1:]: variable, variable_card, evidence, evidence_card, values = self.getcpbvar( line) cpb = TabularCPD(variable=variable, variable_card=variable_card, evidence=evidence, evidence_card=evidence_card, values=[values]) model.add_cpds(cpb) re = model.check_model() print(re)
def createBayesGraph(graph_list,mapping,data): ''' Creating the bayesian network graph and table the graph_list, mapping and data are the parameters needed for creating the tables this function returns: bayes_model - the bayes model and its order cpds_array - array of the tables categories_each_element - categories of each element in the graph ''' cpds_array = [] categories_each_element = {} # Returning an array with the values of each element bayes_model = BayesianModel() bayes_model.add_nodes_from(list(mapping)) for value in graph_list: temp_list=value.split(',') bayes_model.add_edge(temp_list[0],temp_list[1]) data_dict = {mapping[i]: data[:,i] for i in range(0, len(mapping))} data_dict_pd = pandas.DataFrame(data=data_dict) bayes_model.fit(data_dict_pd) cpds_tables = bayes_model.get_cpds() # Creating the array which returs to the client for cpd in cpds_tables: cpds_list = {} for cat in cpd.state_names: categories_each_element[cat] = cpd.state_names[cat] cpd_string = str(cpd).split('|') temp_array = [] cpd_matrix_values = [] digits_numbers = False for a in cpd_string: if (is_number(a)): temp_array.append(float(a.strip())) digits_numbers = True elif ("-+" in a and digits_numbers == True): cpd_matrix_values.append(temp_array) temp_array = [] digits_numbers = False cpds_list[str(list(cpd.variables))] = cpd_matrix_values cpds_array.append(cpds_list) return(bayes_model,cpds_array,categories_each_element)
def make_simple_bayes_net(subtree=False): print('Making bayes net') print('loading data...') training_data, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() # G.add_edges_from([(label, label+'_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [c for c in networkx.ancestors(obo_graph,label) if c in labels_list] for child in children: G.add_edge(label,child) return G
def load_graph(df_cond, df_related_symptoms): G = BayesianModel() # Go through conditions table and add edges (sub symptom -> condition) for i, row in df_cond.iterrows(): cond_id = str(row[0]) for j, col in row.iteritems(): if col != 0.0: sub_symptom_id = str(j) if (sub_symptom_id[:8] == 'sub_symp'): G.add_edge(sub_symptom_id, cond_id) # Go through related symptoms table and add edges (symptom -> sub symptom) for i, row in df_related_symptoms.iterrows(): sympt_id = str(row[0]) for j, col in row.iteritems(): sub_symptom_id = str(j) if sub_symptom_id[:8] == "sub_symp": if col != 0.0: G.add_edge(sympt_id, sub_symptom_id) return G
def get_game_network(): """Create a Bayes Net representation of the game problem. Name the nodes as "A","B","C","AvB","BvC" and "CvA". """ BayesNet = BayesianModel() # TODO: fill this out BayesNet.add_node("A") BayesNet.add_node("B") BayesNet.add_node("C") BayesNet.add_node("AvB") BayesNet.add_node("BvC") BayesNet.add_node("CvA") BayesNet.add_edge("A","AvB") BayesNet.add_edge("A","CvA") BayesNet.add_edge("B","AvB") BayesNet.add_edge("B","BvC") BayesNet.add_edge("C","BvC") BayesNet.add_edge("C","CvA") cpd_a = TabularCPD('A', 4, values=[[0.15], [0.45],[0.3],[0.1]]) cpd_b = TabularCPD('B', 4, values=[[0.15], [0.45],[0.3],[0.1]]) cpd_c = TabularCPD('C', 4, values=[[0.15], [0.45],[0.3],[0.1]]) cpd_avb=TabularCPD("AvB",3, values=[[0.1,0.2,0.15,0.05,0.6,0.1,0.2,0.15,0.75,0.6,0.1,0.2,0.9,0.75,0.6,0.1],\ [0.1,0.6,0.75,0.9,0.2,0.1,0.6,0.75,0.15,0.2,0.1,0.6,0.05,0.15,0.2,0.1],\ [0.8,0.2,0.1,0.05,0.2,0.8,0.2,0.1,0.1,0.2,0.8,0.2,0.05,0.1,0.2,0.8]],\ evidence=["A","B"], evidence_card=[4, 4]) cpd_bvc=TabularCPD("BvC",3, values=[[0.1,0.2,0.15,0.05,0.6,0.1,0.2,0.15,0.75,0.6,0.1,0.2,0.9,0.75,0.6,0.1],\ [0.1,0.6,0.75,0.9,0.2,0.1,0.6,0.75,0.15,0.2,0.1,0.6,0.05,0.15,0.2,0.1],\ [0.8,0.2,0.1,0.05,0.2,0.8,0.2,0.1,0.1,0.2,0.8,0.2,0.05,0.1,0.2,0.8]],\ evidence=["B","C"], evidence_card=[4, 4]) cpd_cva=TabularCPD("CvA",3, values=[[0.1,0.2,0.15,0.05,0.6,0.1,0.2,0.15,0.75,0.6,0.1,0.2,0.9,0.75,0.6,0.1],\ [0.1,0.6,0.75,0.9,0.2,0.1,0.6,0.75,0.15,0.2,0.1,0.6,0.05,0.15,0.2,0.1],\ [0.8,0.2,0.1,0.05,0.2,0.8,0.2,0.1,0.1,0.2,0.8,0.2,0.05,0.1,0.2,0.8]],\ evidence=["C","A"], evidence_card=[4, 4]) BayesNet.add_cpds(cpd_a,cpd_b,cpd_c,cpd_avb,cpd_bvc,cpd_cva) return BayesNet
def create_network(models, processes, files): for p in range(files): temp_model = BayesianModel() for e in range(len(processes[p].get_errors())): temp_error = processes[p].get_error(e) for c in range(len(temp_error.get_causes())): temp_cause = temp_error.get_cause(c) q = temp_cause.get_occ_prob( ) / temp_error.get_total_cause_prob() temp_cause.set_occ_prob(q) temp_model.add_nodes_from([temp_cause, temp_error]) temp_model.add_edge(temp_cause, temp_error) temp_cause_cpd = TabularCPD(variable=temp_cause, variable_card=2, values=[[q, 1 - q]]) temp_model.add_cpds(temp_cause_cpd) temp_error_cpd = TabularCPD( variable=temp_error, variable_card=2, values=get_initial_error_cpd(len(temp_error.get_causes())), evidence=temp_error.get_causes(), evidence_card=[2] * (len(temp_error.get_causes()))) temp_model.add_cpds(temp_error_cpd) for f in range(len(temp_error.get_effects())): temp_effect = temp_error.get_effect(f) temp_model.add_nodes_from([temp_error, temp_effect]) temp_model.add_edge(temp_error, temp_effect) models.append(temp_model) #plotting Failure Tree dot = to_pydot(models[p]) with open('failure_tree_graph_%s.png' % processes[p], 'wb') as f: f.write(dot.create_png()) #Sample output of CPDs for causes and errors for e in range(len(processes[p].get_errors())): for c in range(len(processes[p].get_error(e).get_causes())): print( temp_model.get_cpds( processes[p].get_error(e).get_cause(c))) print(temp_model.get_cpds(processes[p].get_error(e)))
def query(self, networkFile, queryFile): file1 = open(networkFile) lines = file1.readlines() model = BayesianModel() edges = self.getegdes(lines[0]) for i in range(int(len(edges) / 2)): model.add_edge(edges[2 * i], edges[2 * i + 1]) for line in lines[1:]: variable, variable_card, evidence, evidence_card, values = self.getcpbvar(line) cpb = TabularCPD(variable=variable, variable_card=variable_card, evidence=evidence, evidence_card=evidence_card, values=[values]) model.add_cpds(cpb) model.check_model() infer = VariableElimination(model) # infer.query(['G'], evidence={'S': 0, 'D':1}) file2 = open(queryFile) lines = file2.readlines() for line in lines: node, evidence2 = self.infer_query(line) print(infer.query([node], evidence=evidence2)[node].values)
def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=3): testing_graph = BayesianModel() testing_graph.add_nodes_from(bn_truth.causal_graph.nodes()) for edge in remove_bidirected_edges(graph.edges()): try: testing_graph.add_edge(edge[0], edge[1]) except Exception as e: try: testing_graph.add_edge(edge[1], edge[0]) except Exception as e: print(e) continue testing_graph.fit(df_samples, estimator=BayesianEstimator) testing_graph.check_model() bn_test = BayesianNetwork(testing_graph) set_observe(bn_test.bn) set_observe(bn_truth.bn) bn_truth.set_state_names() bn_test.set_state_names() return { 'SID': SID(bn_truth.causal_graph, bn_test.causal_graph), 'SHD': SHD(bn_truth.causal_graph, bn_test.causal_graph), 'OD': np.mean([ ODist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]), 'ID': np.mean([ IDist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]) }
def reduce_model(self, evidence): model = copy.deepcopy(self.model) continuous_factors = [ factor for factor in model.factors if isinstance(factor, ContinuousFactor) ] for var, val in evidence.items(): for factor in continuous_factors: if var in factor.scope( ) and "F(" in var: # make sure that we only reduce at this stage for continuous values, let the inference algorithm deal with reducing for binary variables factor.reduce([(var, val)]) new_model = BayesianModel() additional_evidence = {} for node in model.factors: if isinstance(node, ContinuousFactor): if len(node.scope()) == 1: node = TabularCPD( str(node.scope()[0]), 2, [[node.assignment(0), node.assignment(1)]]) else: node = to_CPD(node) var = node.variable for v in node.scope(): if var != v: new_model.add_edge(str(v), str(var)) if "same_reason" in var: additional_evidence[var] = 1 new_model.add_nodes_from([str(n) for n in node.scope()]) new_model.add_cpds(node) return new_model, additional_evidence
def cal(self, file1, file2): f1 = open(file1) lines = f1.readlines() nodes = self.getegdes(lines[0]) edges = self.getegdes(lines[1]) data = pd.read_csv(file2) G = BayesianModel() G.add_nodes_from(nodes) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) output1 = [] for i in range(int(len(edges) / 2)): mut = mr.mutual_info_score(data[edges[2 * i]], data[edges[2 * i + 1]]) output1.append(mut) output2 = {} for node1 in G.nodes(): d = {} for node2 in G.nodes(): if node1 == node2: continue mut = mr.mutual_info_score(data[node1], data[node2]) d[node2] = mut output2[node1] = d print(output1) print(output2) with open('mutual_output.txt', 'w') as f: f.write(str(output1)) f.write('\n') f.write(str(output2))
def get_game_network(): """Create a Bayes Net representation of the game problem. Name the nodes as "A","B","C","AvB","BvC" and "CvA". """ BayesNet = BayesianModel() # TODO: fill this out BayesNet.add_node("A") BayesNet.add_node("B") BayesNet.add_node("C") BayesNet.add_node("AvB") BayesNet.add_node("BvC") BayesNet.add_node("CvA") BayesNet.add_edge("A", "AvB") BayesNet.add_edge("A", "CvA") BayesNet.add_edge("B", "AvB") BayesNet.add_edge("B", "BvC") BayesNet.add_edge("C", "BvC") BayesNet.add_edge("C", "CvA") skill_dist = [[0.15], [0.45], [0.30], [0.10]] a_cpd = TabularCPD("A", 4, values=skill_dist) b_cpd = TabularCPD("B", 4, values=skill_dist) c_cpd = TabularCPD("C", 4, values=skill_dist) game_dist = [[ 0.1, 0.2, 0.15, 0.05, 0.6, 0.1, 0.2, 0.15, 0.75, 0.6, 0.1, 0.2, 0.9, 0.75, 0.6, 0.1 ], [ 0.1, 0.6, 0.75, 0.9, 0.2, 0.1, 0.6, 0.75, 0.15, 0.2, 0.1, 0.6, 0.05, 0.15, 0.2, 0.1 ], [ 0.8, 0.2, 0.1, 0.05, 0.2, 0.8, 0.2, 0.1, 0.1, 0.2, 0.8, 0.2, 0.05, 0.1, 0.2, 0.8 ]] # avb_cpd = TabularCPD("AvB", 3, values=game_dist, evidence=["A", "B"], evidence_card=[4, 4]) avb_cpd = TabularCPD("AvB", 3, values=game_dist, evidence=["A", "B"], evidence_card=[4, 4]) bvc_cpd = TabularCPD("BvC", 3, values=game_dist, evidence=["B", "C"], evidence_card=[4, 4]) cva_cpd = TabularCPD("CvA", 3, values=game_dist, evidence=["C", "A"], evidence_card=[4, 4]) BayesNet.add_cpds(a_cpd, b_cpd, c_cpd, avb_cpd, bvc_cpd, cva_cpd) return BayesNet
def get_structure(data): """ Structure of model from data (Loaded RDS) Parameters ---------- data : RDS data object model : BayesianModel Empty network Returns ------- model : BayesianModel Model converted from RDS to python """ model = BayesianModel() for k in range(len(data)): node = list(data[k][0])[0] children = list(data[k][2]) model.add_node(node) for child in children: model.add_edge(node, child) return model
def makeModel(self): # graph structure initRelation = self.getTopicDAG() # 概率图构建 relationList = [] stuModel = BayesianModel() # DAG for edge in initRelation: try: start, end = edge stuModel.add_edge(str(start), str(end)) relationList.append(edge) except: continue # save file with open('model.txt', 'w') as write_f: for item in relationList: write_f.write( '%s,%s\n' % (self.topicDict[item[0]], self.topicDict[item[1]])) # learning from data topicList = [topic for topic in self.topicDict] rowsData = self.getRowsNormalize(topicList) print 'the rows len is:%s' % len(rowsData)
def make_power_plant_net(): BayesNet = BayesianModel() # TODO: finish this function BayesNet.add_node("A") BayesNet.add_node("B") BayesNet.add_node("C") BayesNet.add_node("D") BayesNet.add_node("E") BayesNet.add_node("F") BayesNet.add_node("J") BayesNet.add_edge("A", "C") BayesNet.add_edge("B", "C") BayesNet.add_edge("C", "D") BayesNet.add_edge("C", "E") BayesNet.add_edge("E", "J") BayesNet.add_edge("F", "J") return BayesNet
def make_power_plant_net(): BayesNet = BayesianModel() # TODO: finish this function BayesNet.add_node("NI") BayesNet.add_node("St") BayesNet.add_node("I") BayesNet.add_node("S") BayesNet.add_node("T") BayesNet.add_node("L") BayesNet.add_node("B") BayesNet.add_edge("NI", "I") BayesNet.add_edge("St", "I") #BayesNet.add_edge("I","S") BayesNet.add_edge("I", "T") BayesNet.add_edge("I", "L") BayesNet.add_edge("S", "L") BayesNet.add_edge("S", "B") return BayesNet
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual( hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
def pdag_to_dag(pdag): """Completes a PDAG to a DAG, without adding v-structures, if such a completion exists. If no faithful extension is possible, some fully oriented DAG that corresponds to the PDAG is returned and a warning is generated. This is a static method. Parameters ---------- pdag: DirectedGraph A directed acyclic graph pattern, consisting in (acyclic) directed edges as well as "undirected" edges, represented as both-way edges between nodes. Returns ------- dag: BayesianModel A faithful orientation of pdag, if one exists. Otherwise any fully orientated DAG/BayesianModel with the structure of pdag. References ---------- [1] Chickering, Learning Equivalence Classes of Bayesian-Network Structures, 2002; See page 454 (last paragraph) for the algorithm pdag_to_dag http://www.jmlr.org/papers/volume2/chickering02a/chickering02a.pdf [2] Dor & Tarsi, A simple algorithm to construct a consistent extension of a partially oriented graph, 1992, http://ftp.cs.ucla.edu/pub/stat_ser/r185-dor-tarsi.pdf Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgmpy.base import DirectedGraph >>> from pgmpy.estimators import ConstraintBasedEstimator >>> data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 3)), columns=list('ABD')) >>> data['C'] = data['A'] - data['B'] >>> data['D'] += data['A'] >>> c = ConstraintBasedEstimator(data) >>> pdag = c.skeleton_to_pdag(*c.estimate_skeleton()) >>> pdag.edges() [('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')] >>> c.pdag_to_dag(pdag).edges() [('B', 'C'), ('A', 'D'), ('A', 'C')] >>> # pdag_to_dag is static: ... pdag1 = DirectedGraph([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')]) >>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges() [('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')] >>> # example of a pdag with no faithful extension: ... pdag2 = DirectedGraph([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')]) >>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges() UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG). Remaining undirected PDAG edges oriented arbitrarily. [('B', 'C'), ('A', 'B'), ('A', 'C')] """ pdag = pdag.copy() dag = BayesianModel() dag.add_nodes_from(pdag.nodes()) # add already directed edges of pdag to dag for X, Y in pdag.edges(): if not pdag.has_edge(Y, X): dag.add_edge(X, Y) while pdag.number_of_nodes() > 0: # find node with (1) no directed outgoing edges and # (2) the set of undirected neighbors is either empty or # undirected neighbors + parents of X are a clique found = False for X in pdag.nodes(): directed_outgoing_edges = set(pdag.successors(X)) - set(pdag.predecessors(X)) undirected_neighbors = set(pdag.successors(X)) & set(pdag.predecessors(X)) neighbors_are_clique = all((pdag.has_edge(Y, Z) for Z in pdag.predecessors(X) for Y in undirected_neighbors if not Y == Z)) if not directed_outgoing_edges and \ (not undirected_neighbors or neighbors_are_clique): found = True # add all edges of X as outgoing edges to dag for Y in pdag.predecessors(X): dag.add_edge(Y, X) pdag.remove_node(X) break if not found: warn("PDAG has no faithful extension (= no oriented DAG with the " + "same v-structures as PDAG). Remaining undirected PDAG edges " + "oriented arbitrarily.") for X, Y in pdag.edges(): if not dag.has_edge(Y, X): try: dag.add_edge(X, Y) except ValueError: pass break return dag
from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD from pgmpy.inference import VariableElimination model = BayesianModel() model.add_nodes_from(['family_out','bowel_problem', 'light_on','dog_out','hear_bark']) model.add_edge('family_out', 'light_on') model.add_edge('family_out', 'dog_out') model.add_edge('bowel_problem', 'dog_out') model.add_edge('dog_out', 'hear_bark') cpd_fo = TabularCPD(variable='family_out', variable_card=2, values=[[0.15], [0.85]]) cpd_bp = TabularCPD(variable='bowel_problem', variable_card=2, values=[[0.01], [0.99]]) cpd_do = TabularCPD(variable='dog_out', variable_card=2, values=[[0.99, 0.9, 0.97, 0.3],[0.01, 0.1, 0.03, 0.7]], evidence=['family_out', 'bowel_problem'], evidence_card=[2, 2]) cpd_lo = TabularCPD(variable='light_on', variable_card=2, values=[[0.6, 0.05],[0.4, 0.95]], evidence=['family_out'], evidence_card=[2]) cpd_hb = TabularCPD(variable='hear_bark', variable_card=2, values=[[0.7, 0.01],[0.3, 0.99]], evidence=['dog_out'], evidence_card=[2]) model.add_cpds(cpd_fo, cpd_bp, cpd_do, cpd_lo, cpd_hb) #model justification model.check_model() infer = VariableElimination(model) print(infer.query(['family_out'], evidence={'light_on': 0, 'hear_bark': 1}) ['family_out'])
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
def main(): andPGM = PGM_t() print('loading features..') train_set, test_set = andPGM.load_features() print('loading features.. Done') # Bayesian network of 19 nodes, 9*2 variables of network given # Initial incomplete Bayesian model connected manually based on intuition print('Generating model.. ') initialModel = BayesianModel({}) initialModel.add_nodes_from(andPGM.img_features.columns[1:10].tolist()) initialModel.add_edges_from([('f6_a' , 'f2_a'),\ ('f3_a' , 'f4_a') ,\ ('f5_a' , 'f9_a') ,\ ('f4_a' , 'f7_a') ]) # Use hill climb search algorithm to find network structure of initial 9 nodes hc = HillClimbSearch(data=andPGM.img_features.iloc[0:,1:10], \ scoring_method=BdeuScore(andPGM.img_features.iloc[0:,1:10], \ equivalent_sample_size=0.1*len(andPGM.img_features)), \ state_names = andPGM.states_9) # Get best estimated structure best_model = hc.estimate(start=initialModel) # Edges in the acquired graph print('model of 9 var: ', best_model.edges()) # Create a Clone of generated Bayesian network structure clone_model = BayesianModel({}) for edge in best_model.edges(): new_edge = [edge[0][:-1] + 'b', edge[1][:-1] + 'b'] clone_model.add_edges_from([new_edge]) # Join together the Original and clone network through node 'same' multinetModel = BayesianModel({}) multinetModel.add_edges_from(best_model.edges() + clone_model.edges()) multinetModel.add_node('same') multinetModel.add_edge('f5_a', 'same') multinetModel.add_edge('f9_a', 'same') multinetModel.add_edge('f5_b', 'same') multinetModel.add_edge('f9_b', 'same') print('Generating model.. Done') # Edges in the final structure print('Final model: ', multinetModel.edges()) print('Fit data into model..') # fit the data to model to generate CPDs using maximum likelyhood estimation multinetModel.fit(data=train_set, state_names=andPGM.states_all) print('Fit data into model.. Done') print('CPDs generated: ') cpds = multinetModel.get_cpds() for cpd in cpds: print(cpd) # Inference using Variable Elimination print('Start inference..') inference = VariableElimination(multinetModel) train_set_same = train_set[train_set['same'] == 0] train_set_not_same = train_set[train_set['same'] == 1] # Accuracy of positive inferences acc_same = andPGM.chk_accuracy( train_set_same, inference, variables=train_set_same.columns[0:9].tolist(), evidence=train_set_same.columns[9:19].tolist()) print('accuracy of positives ', acc_same) # Accuracy of negative inferences acc_nt_same = andPGM.chk_accuracy( train_set_not_same, inference, variables=train_set_not_same.columns[0:9].tolist(), evidence=train_set_not_same.columns[9:19].tolist()) print('accuracy of negatives', acc_nt_same)
from pgmpy.models import BayesianModel from pgmpy.factors import TabularCPD # Creating the above bayesian network model = BayesianModel() model.add_nodes_from(['Rain', 'TrafficJam']) model.add_edge('Rain', 'TrafficJam') model.add_edge('Accident', 'TrafficJam') cpd_rain = TabularCPD('Rain', 2, [[0.4], [0.6]]) cpd_accident = TabularCPD('Accident', 2, [[0.2], [0.8]]) cpd_traffic_jam = TabularCPD('TrafficJam', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['Rain', 'Accident'], evidence_card=[2, 2]) model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam) model.add_node('LongQueues') model.add_edge('TrafficJam', 'LongQueues') cpd_long_queues = TabularCPD('LongQueues', 2, [[0.9, 0.2], [0.1, 0.8]], evidence=['TrafficJam'], evidence_card=[2]) model.add_cpds(cpd_long_queues) model.add_nodes_from(['GettingUpLate', 'LateForSchool']) model.add_edges_from([('GettingUpLate', 'LateForSchool'), ('TrafficJam', 'LateForSchool')]) cpd_getting_up_late = TabularCPD('GettingUpLate', 2, [[0.6], [0.4]]) cpd_late_for_school = TabularCPD('LateForSchool', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],