def vetor_Rede(solucao, nodes): G_aux = BayesianModel() #G_aux.add_nodes_from(nodes) k = 0 aux = 1 for i in range(1, len(nodes)): for j in range(aux): if solucao[k] == 1: if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes( ) and nx.has_path(G_aux, nodes[j], nodes[i]): return False else: G_aux.add_edge(nodes[i], nodes[j]) elif solucao[k] == 2: if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes( ) and nx.has_path(G_aux, nodes[i], nodes[j]): return False else: G_aux.add_edge(nodes[j], nodes[i]) k = k + 1 aux = aux + 1 for i in nodes: if i not in G_aux.nodes(): return False return G_aux
def inf(self, file1): f1 = open(file1, encoding="utf8") lines = f1.readlines() i = 0 G = BayesianModel() nodeList = {} while i < len(lines): if lines[i] == '\n': break nodeName = self.getnode(lines[i]) valueNum = int(lines[i + 1]) cpd_str = lines[i + 2] sequence = self.getList(lines[i + 3]) card = self.getCard(lines[i + 4]) cpd = self.parseCpd(cpd_str, valueNum, card) l = {} l['nodeName'] = nodeName l['valueNum'] = valueNum l['cpd'] = cpd l['sequence'] = sequence l['card'] = card # l = [nodeName,valueNum,cpd,sequence,card] nodeList[nodeName] = l i += 5 edges = self.getegdes(lines[i + 1]) evidence2 = self.getValue(lines[i + 3]) # print(nodeList) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) for (this, node) in nodeList.items(): if node['sequence'][0] == '': cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], values=node['cpd']) else: cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], evidence=node['sequence'], evidence_card=node['card'], values=node['cpd']) G.add_cpds(cpt) if G.check_model(): # print('1') # belief_propagation = BeliefPropagation(G) inference = VariableElimination(G) result = '' for node in G.nodes(): if node not in evidence2: namelist = [node] result += node + ' ' phi_query = inference.query(variables=namelist, evidence=evidence2, show_progress=False).values result += str(phi_query) + '\n' print(result)
def test_build_skeleton(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind) self.assertTrue( self._edge_list_equal(skel1.edges(), [('A', 'D'), ('B', 'D'), ('C', 'D')])) sep_sets_ref1 = { frozenset({'A', 'C'}): (), frozenset({'A', 'B'}): (), frozenset({'C', 'B'}): () } self.assertEqual(sep_sets1, sep_sets_ref1) model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton( model.nodes(), model.get_independencies()) self.assertTrue( self._edge_list_equal(skel2, [('D', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'E')])) sep_sets_ref2 = { frozenset({'D', 'C'}): ('B', ), frozenset({'E', 'B'}): ('C', ), frozenset({'A', 'D'}): (), frozenset({'E', 'D'}): ('C', ), frozenset({'E', 'A'}): ('C', ), frozenset({'A', 'B'}): () } # witnesses/seperators might change on each run, so we cannot compare directly self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys()) self.assertEqual([len(v) for v in sorted(sep_sets2.values())], [len(v) for v in sorted(sep_sets_ref2.values())])
def backdoorsTo(model: BayesianModel, node: RandomVariable, notation: str = ARROW) -> Dict[Name, List[Set[Name]]]: inference: CausalInference = CausalInference(model) # Getting all the predecessors to get a more complete list of possible adjustment sets # TODO: does this give the entire possible list of adjustment sets with each predecessor node, from the bottom # queryVar? #predecessorVars = model.predecessors(queryVar) #model.get_parents(queryVar) allVars = model.nodes() # Getting the variables that will be used as evidence / observed to influence active trails (in other words, # the variables that must be set as observed in the query of variable elimination) varAndObservedPairs = set([ (startVar, inference.get_all_backdoor_adjustment_sets(X=startVar, Y=node.var)) for startVar in allVars ]) # remove null forzen sets #pairsOfPredObservedVars = list(filter(lambda pair: pair[1] != frozenset(), pairsOfPredObservedVars)) # Attaching ev var to the frozen set adjustment sets (changing the frozenset datatype to be set on the inside and # list on the outside) backdoorChoices: List[Tuple[Name, Set[Name]]] = list( itertools.chain( *[[(startVar, set(innerFroz)) for innerFroz in outerFroz] if outerFroz != frozenset() else [( startVar, None)] for startVar, outerFroz in varAndObservedPairs])) # Creating a dict to accumulate adjustment sets of the same keys (concatenating) backdoorDict: Dict[Name, List[Set[Name]]] = {} for startVar, adjustSets in backdoorChoices: if startVar in backdoorDict.keys(): backdoorDict[startVar] = backdoorDict[startVar] + [adjustSets] else: backdoorDict[startVar] = [adjustSets] if notation == ARROW: #use arrows # Now creating the arrow between startvar and endvar (to make the path clear) backdoorTrailDict: Dict[Trail, List[Set[Name]]] = {} for startVar, adjustLists in backdoorDict.items(): backdoorTrailDict[f"{startVar} --> {node.var}"] = adjustLists return backdoorTrailDict elif notation == PAIR: Pair = collections.namedtuple("Pair", ["From", "To", "AdjustSets"]) lists = [] for startVar, adjustLists in backdoorDict.items(): lists.append( Pair(From=startVar, To=node.var, AdjustSets=adjustLists)) return lists else: # do some notation (if notation == None) return backdoorDict
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get("type") == "BayesianNetwork": model = BayesianModel() model.add_nodes_from(self.probnet["Variables"].keys()) model.add_edges_from(self.probnet["edges"].keys()) tabular_cpds = [] cpds = self.probnet["Potentials"] for cpd in cpds: var = list(cpd["Variables"].keys())[0] states = self.probnet["Variables"][var]["States"] evidence = cpd["Variables"][var] evidence_card = [ len(self.probnet["Variables"][evidence_var]["States"]) for evidence_var in evidence ] arr = list(map(float, cpd["Values"].split())) values = np.array(arr) values = values.reshape((len(states), values.size // len(states))) tabular_cpds.append( TabularCPD(var, len(states), values, evidence, evidence_card) ) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet["Variables"][var].items(): model.nodes[var][prop_name] = prop_value edges = model.edges() if nx.__version__.startswith("1"): for edge in edges: for prop_name, prop_value in self.probnet["edges"][edge].items(): model.edge[edge[0]][edge[1]][prop_name] = prop_value else: for edge in edges: for prop_name, prop_value in self.probnet["edges"][edge].items(): model.adj[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def cal(self, file1, file2): f1 = open(file1) lines = f1.readlines() nodes = self.getegdes(lines[0]) edges = self.getegdes(lines[1]) data = pd.read_csv(file2) G = BayesianModel() G.add_nodes_from(nodes) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) output1 = [] for i in range(int(len(edges) / 2)): mut = mr.mutual_info_score(data[edges[2 * i]], data[edges[2 * i + 1]]) output1.append(mut) output2 = {} for node1 in G.nodes(): d = {} for node2 in G.nodes(): if node1 == node2: continue mut = mr.mutual_info_score(data[node1], data[node2]) d[node2] = mut output2[node1] = d print(output1) print(output2) with open('mutual_output.txt', 'w') as f: f.write(str(output1)) f.write('\n') f.write(str(output2))
def create_model_and_inference(): dep_df = pd.read_csv('dependencies.csv', sep=';') def connect(df, source, edgelist): source_df = df[df['Column2'] == source] for col in source_df.iloc[0, 3:len(source_df.columns)]: target_df = df[df['Column1'] == col]['Column2'] if not target_df.empty: target = target_df.item() if not (target, source) in edgelist: edgelist.append((source, target)) connect(df, target, edgelist) edges = [] connect(dep_df, 'myproximus-usage', edges) edges = [(t[1], t[0]) for t in edges] nodes = set(itertools.chain.from_iterable(edges)) nodes_df = dep_df.iloc[:, 1].to_frame() nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)] nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df = nodes_df.set_index('Column2').transpose() model = BayesianModel() model.add_nodes_from(nodes) for edge in edges: try: model.add_edge(edge[0], edge[1]) except: print('WARNING: tried to add edge which forms loop: ' + str(edge)) model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu") # for cpd in model.get_cpds(): # print(cpd) draw_network(model.nodes(), model.edges(), {}, []) return model, VariableElimination(model)
def test_estimate_from_independencies(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind) self.assertSetEqual(set(model.edges()), set([('B', 'D'), ('A', 'D'), ('C', 'D')])) model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue(set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR): print('Making bayes net') graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p' if os.path.isfile(graph_file) and load == True: print('Loading saved graph from file...') G = pickle.load(open(graph_file, 'rb')) G.check_model() else: print('loading data...') training_labels, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() G.add_edges_from([(label, label + '_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [ c for c in networkx.ancestors(obo_graph, label) if c in labels_list ] for child in children: G.add_edge(child, label) predicted_cpds = get_model_cpds(labels_list=labels_list, modelsdir=MODEL_CPDS_DIR) for cpd in predicted_cpds: G.add_cpds(cpd) true_label_cpds = get_true_label_cpds(training_labels, go_dict, labels_list=labels_list) for cpd in true_label_cpds: G.add_cpds(cpd) remove_list = [] for node in G.nodes(): if G.get_cpds(node) == None: remove_list.append(node) # remove_list.append(node+'_hat') for node in remove_list: if node in G: G.remove_node(node) G.check_model() pickle.dump(G, open(graph_file, 'wb')) return G
def test_estimate_from_independencies(self): ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"]) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies( "ABCD", ind) self.assertSetEqual(set(model.edges()), set([("B", "D"), ("A", "D"), ("C", "D")])) model1 = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"), ("C", "E")]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue( set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([("B", "C"), ("A", "C"), ("C", "E"), ("D", "B")]))
def test_estimate_from_independencies(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies( "ABCD", ind) self.assertSetEqual(set(model.edges()), set([('B', 'D'), ('A', 'D'), ('C', 'D')])) model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue( set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get('type') == "BayesianNetwork": model = BayesianModel(self.probnet['edges'].keys()) tabular_cpds = [] cpds = self.probnet['Potentials'] for cpd in cpds: var = list(cpd['Variables'].keys())[0] states = self.probnet['Variables'][var]['States'] evidence = cpd['Variables'][var] evidence_card = [len(self.probnet['Variables'][evidence_var]['States']) for evidence_var in evidence] arr = list(map(float, cpd['Values'].split())) values = np.array(arr) values = values.reshape((len(states), values.size//len(states))) tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card)) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet['Variables'][var].items(): model.node[var][prop_name] = prop_value edges = model.edges() for edge in edges: for prop_name, prop_value in self.probnet['edges'][edge].items(): model.edge[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def test_build_skeleton(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind) self.assertTrue(self._edge_list_equal(skel1.edges(), [('A', 'D'), ('B', 'D'), ('C', 'D')])) sep_sets_ref1 = {frozenset({'A', 'C'}): (), frozenset({'A', 'B'}): (), frozenset({'C', 'B'}): ()} self.assertEqual(sep_sets1, sep_sets_ref1) model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies()) self.assertTrue(self._edge_list_equal(skel2, [('D', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'E')])) sep_sets_ref2 = {frozenset({'D', 'C'}): ('B',), frozenset({'E', 'B'}): ('C',), frozenset({'A', 'D'}): (), frozenset({'E', 'D'}): ('C',), frozenset({'E', 'A'}): ('C',), frozenset({'A', 'B'}): ()} # witnesses/seperators might change on each run, so we cannot compare directly self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys()) self.assertEqual([len(v) for v in sorted(sep_sets2.values())], [len(v) for v in sorted(sep_sets_ref2.values())])
def test_build_skeleton(self): ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"]) ind = ind.closure() skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind) self.assertTrue( self._edge_list_equal(skel1.edges(), [("A", "D"), ("B", "D"), ("C", "D")])) sep_sets_ref1 = { frozenset({"A", "C"}): (), frozenset({"A", "B"}): (), frozenset({"C", "B"}): (), } self.assertEqual(sep_sets1, sep_sets_ref1) model = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"), ("C", "E")]) skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton( model.nodes(), model.get_independencies()) self.assertTrue( self._edge_list_equal(skel2, [("D", "B"), ("A", "C"), ("B", "C"), ("C", "E")])) sep_sets_ref2 = { frozenset({"D", "C"}): ("B", ), frozenset({"E", "B"}): ("C", ), frozenset({"A", "D"}): (), frozenset({"E", "D"}): ("C", ), frozenset({"E", "A"}): ("C", ), frozenset({"A", "B"}): (), } # witnesses/seperators might change on each run, so we cannot compare directly self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys()) self.assertEqual( [len(v) for v in sorted(sep_sets2.values())], [len(v) for v in sorted(sep_sets_ref2.values())], )
# Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # Checking if the cpds are valid for the model. print(cancer_model.check_model()) # Check d-separations. This is only meant for those interested. You do not need to understand this to do the project. print(cancer_model.is_active_trail('Pollution', 'Smoker')) print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])) print(cancer_model.local_independencies('Xray')) print(cancer_model.get_independencies()) # Print model information print(cancer_model.edges()) print(cancer_model.nodes()) print(cancer_model.get_cpds()) # Doing exact inference using Variable Elimination from pgmpy.inference import VariableElimination cancer_infer = VariableElimination(cancer_model) # Query print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0})) print( cancer_infer.query(variables=['Cancer'], evidence={ 'Smoker': 0, 'Pollution': 0 }))
3, range(3), True) hh['IncomeQ'] = hh.apply(lambda row: incomeToNHTSBands(row['HhIncome']), axis=1) hh.loc[hh['HhSize'] > 5, 'HhSize'] = 6 #need to start the ordinal variables at zero hh['Bedrooms'] = hh.apply(lambda row: row['Bedrooms'] - 1, axis=1) hh['HhSize'] = hh.apply(lambda row: row['HhSize'] - 1, axis=1) hh['IncomeQ'] = hh.apply(lambda row: row['IncomeQ'] - 1, axis=1) model = BayesianModel([('IncomeQ', 'Bedrooms'), ('HhSize', 'Bedrooms'), ('IncomeQ', 'RentQ'), ('Bedrooms', 'RentQ')]) #nx.draw_networkx(model, with_labels=True) modelData = hh[model.nodes()].copy() testData = modelData.iloc[int(0.85 * modelData.shape[0]):int(modelData.shape[0] )].copy() trainData = modelData.iloc[0:int(0.85 * modelData.shape[0])].copy() model.fit(trainData, estimator=MaximumLikelihoodEstimator) #for cpd in model.get_cpds(): # print("CPD of {variable}:".format(variable=cpd.variable)) # print(cpd) model_sample = BayesianModelSampling(model) pickle.dump(model_sample, open('results/sampler.p', 'wb')) # open the nhts sample and add the inferred resType requirements nhtsSample = pd.read_csv('results/nhtsSample.csv') resType = []
from pgmpy.readwrite.BIF import BIFWriter import pandas as pd import numpy as np from time import time import graphviz as gv import os train = pd.read_csv('../msnbcWithHeader.csv', sep=',') train = train[train.sum(axis=1) < 200] train[train > 1] = 1 train_start = time() bic = BicScore(train) hc = HillClimbSearch(train, scoring_method=bic) best_model = hc.estimate(prog_bar=True) edges = best_model.edges() model = BayesianModel(edges) model.fit(train, estimator=BayesianEstimator, prior_type="BDeu") variables = model.nodes() print(model.edges()) train_end = time() - train_start print("train time " + str(train_end)) my_graph = gv.Digraph(format='png') for node in variables: my_graph.node(node) for edge in edges: my_graph.edge(edge[0], edge[1]) filename = my_graph.render('../graph', view=True)
class MyClass(object): def __init__(self, case): self.case = case self.results = [] self.networx_test = nx.DiGraph() self.networx_fixed = nx.DiGraph() self.pgmpy_test = BayesianModel() self.networx = nx.DiGraph() self.pgmpy = BayesianModel() self.best_error = math.inf self.best_topology = [0, 0, nx.DiGraph, 0] #[error, entropy, networkx DiGraph, loop] self.dictionary = [] self.header = {} self.nodes_0 = [] self.edges_0 = {} self.nodes = [] self.edges = {} self.cpds = {} self.colors_dictionary = {} self.colors_table = [] self.colors_cpd = [] self.learning_data = {} self.nummber_of_colors = 0 self._util = Utilities(case) self._lat = Lattices(self._util) self.expected_result = [0, 0, 0] self.loop = 0 def get_my_colors(self): evidence = [] cardinality = [] for i, node in enumerate(self.nodes): if 'BEN' in node[0] or 'MEM' in node[0]: evidence.append(node[0]) cardinality.append(node[1]['cardinality']) self.colors_dictionary, self.colors_table, self.colors_cpd = self.color_cpd( 'WORLD', 3, evidence, cardinality) self.number_of_colors = self.colors_table.shape[1] # for i in range(0, len(self.colors_table[1])): # rows = len(self.colors_table) # hi = 1000 # lo = 1 # sum = hi+(rows-1) # hi /= sum # lo /= sum # for j in range(0, rows): # if self.colors_table[j][i] == 1: # self.colors_table[j][i] = hi # else: # self.colors_table[j][i] = lo # print('Number of colors : ', self.number_of_colors) # print(self.colors_cpd) #print(self.colors_cpd.values) def color_cpd(self, var, card_var, evidence, cardinality): table = CPD.get_index_matrix(cardinality) colors = {} hi = 1 #0.999 lo = 1 - hi C = np.prod(cardinality) matrix = np.full((3, C), 1. / 3.) if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and 'BENS_0' in evidence: matrix[0] = [1. / 3, lo, hi, 1. / 3, 1. / 3, lo, hi, 1. / 3] matrix[1] = [1. / 3, lo, lo, 1. / 3, 1. / 3, lo, lo, 1. / 3] matrix[2] = [1. / 3, hi, lo, 1. / 3, 1. / 3, hi, lo, 1. / 3] if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence: matrix[0] = [1. / 3, lo, hi, 1. / 3] matrix[1] = [1. / 3, lo, lo, 1. / 3] matrix[2] = [1. / 3, hi, lo, 1. / 3] if 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence: matrix[0] = [lo, lo, lo, lo, hi, lo, hi, lo] matrix[1] = [hi, lo, hi, lo, lo, hi, lo, hi] matrix[2] = [lo, hi, lo, hi, lo, lo, lo, lo] if 'BENS_0' in evidence and 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence: matrix[0] = [ lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo ] matrix[1] = [ hi, lo, hi, lo, lo, hi, lo, hi, hi, lo, hi, lo, lo, hi, lo, hi ] matrix[2] = [ lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo ] cpd = TabularCPD(variable=var, variable_card=card_var, values=matrix, evidence=evidence, evidence_card=cardinality) for i, node in enumerate(evidence): colors.update({node: table[i]}) return colors, table, cpd # def set_color(self, color): # col = self.colors_table[:, color] # for i in range(0,len(col)): # node = 'BENS_'+ str(i) # self.pgmpy.get_cpds(node).values = CPD.RON_cpd(node, self.pgmpy.get_cardinality(node), mu = int(col[i])).values def test_topology(self, entropy): self.networx_test = copy.deepcopy(self.networx) self.pgmpy_test = BayesianModel() self.pgmpy_test = self._util.translate_digraph_to_pgmpy( self.networx.copy()) #model = {'main': GenerativeModel(SensoryInputVirtualPeepo(self), self.pgmpy_test)} self.expected_result = [0, 0, 0] ''' ------ going through all possible "colors''' error = 0 for color in range(0, self.number_of_colors): states = self.colors_table[:, color] shape = self.colors_cpd.values.shape reshaped_cpd = self.colors_cpd.values.reshape( shape[0], int(np.prod(shape) / shape[0])) self.expected_result = reshaped_cpd[:, int(color)] for i, pixel in enumerate(states): if 'BENS_' + str(i) not in self.networx_test.nodes(): continue cardinality = self.pgmpy_test.get_cardinality('BENS_' + str(i)) self.pgmpy_test.get_cpds( 'BENS_' + str(i)).values = CPD.create_fixed_parent( cardinality, state=int(pixel)) #error += self.do_inference(model) error += self.do_simple_inference() error /= self.number_of_colors self.results.append([entropy, error]) if error <= self.best_error: self.best_error = error self.best_topology[0] = error self.best_topology[1] = entropy self.best_topology[2] = self.networx_test self.best_topology[3] = self.loop self.loop += 1 def do_inference(self, models): error = 0 for key in models: error += models[key].process() return error '''.................. vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ..................................''' def do_simple_inference(self): total_prediction_error_size = 0 for node in self.pgmpy_test.get_leaves(): prediction = self.predict(node) observation = self.sensory_input(node) prediction_error_size = self.error_size(prediction, observation) prediction_error = self.error(prediction, observation) precision = entropy(prediction, base=2) total_prediction_error_size += prediction_error_size return total_prediction_error_size def predict(self, node): """ Predicts the given leaf node (i.e. the observational node) based on the root nodes (i.e. the belief nodes) :return: prediction for given observation variable, a prediction is a probability distribution :rtype: np.array """ infer = VariableElimination(self.pgmpy_test) evidence = self.get_root_nodes() evidence = {k: v for k, v in evidence.items() if k not in [node]} return infer.query(variables=[node], evidence=evidence)[node].values def sensory_input(self, name): expected_result = self.expected_result cpds = [] for i in range(0, len(expected_result)): cpds.append([ 'WORLD_' + str(i), CPD.create_fixed_parent(2, state=int(expected_result[i])) ]) for i, node in enumerate(self.nodes): for j in range(0, len(cpds)): if name == cpds[j][0]: return cpds[j][1] def error(self, pred, obs): """ Calculates the prediction error as the residual of subtracting the predicted inputs from the observed inputs :param pred: predicted sensory inputs :param obs: observed sensory inputs :return: prediction error :type pred : np.array :type obs : np.array :rtype : np.array """ return obs - pred def error_size(self, pred, obs): """ Calculates the size of the prediction error as the Kullback-Leibler divergence. This responds the magnitude of the prediction error, how wrong the prediction was. :param pred: predicted sensory inputs :param obs: observed sensory inputs :return: prediction error size :type pred : np.array :type obs : np.array :rtype : float """ return entropy(obs, pred) def get_root_nodes(self): """ Returns status of all root nodes. :param network: Bayesian Network representing the generative model :return: Dictionary containing all root nodes as keys and status as values :type network: BayesianModel :rtype dict """ roots = {} for root in self.pgmpy_test.get_roots(): roots.update( {root: np.argmax(self.pgmpy_test.get_cpds(root).values)}) return roots def get_observations(self): obs = {} for leaf in self.pgmpy_test.get_leaves(): obs.update( {leaf: np.argmax(self.pgmpy_test.get_cpds(leaf).values)}) return obs '''********************** ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ''' def estimate_parameters(self): data = pd.DataFrame(data=self.learning_data) sample_size = len(self.learning_data) # print(sample_size) estimator = BayesianEstimator(self.pgmpy, data) # print('data') # print('pgmpy node : ', self.pgmpy.nodes()) # print(self.learning_data) # print(data) pseudocount = { 'BENS_0': [1, 2], 'BENS_1': [1, 2], 'BENS_2': [1, 2], 'BENS_3': [1, 2], 'WORLD_0': [1, 2], 'WORLD_1': [1, 2], 'WORLD_2': [1, 2] } pseudocount = [0.9, 0.9] if not 'BENS_1' in self.pgmpy.nodes( ) or not 'BENS_2' in self.pgmpy.nodes( ) or not 'BENS_3' in self.pgmpy.nodes(): pseudocount = [0.9, 0.9, 0.9] # print('pseudocount :', pseudocount) for i, node in enumerate(self.nodes): if 'LAN' in node[0] or 'MOTOR' in node[0] or 'WORLD' in node[0]: # print('cardinality node ', node[0], ' : ', self.pgmpy.get_cardinality(node[0])) # print(self.pgmpy.get_cpds(node[0]).values) #self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(node[0], prior_type='dirichlet', pseudo_counts=pseudocount).values self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd( node[0], prior_type='BDeu', equivalent_sample_size=sample_size).values def add_edges(self, topology): self.networx.remove_edges_from(self.edges) self.edges = [] self.nodes = [] shape = np.asarray(topology).shape ''' let's first remove all void nodes ----> not necssary -----> delete the code ??''' nodes_to_remove = [] # rows = np.sum(topology, axis = 1) # for row in range(0, len(rows)): # if rows[row] == 0: # nodes_to_remove.append('WORLD_' + str(row)) columns = np.sum(topology, axis=0) for column in range(0, len(columns)): if columns[column] == 0: nodes_to_remove.append('BENS_' + str(column)) self.networx.remove_nodes_from(nodes_to_remove) self.nodes = self.networx.nodes(data=True) for column in range(0, shape[1]): for row in range(0, shape[0]): if topology[row][column] == 1: parent = 'BENS_' + str(column) child = 'WORLD_' + str(row) self.networx.add_edge(parent, child) self.edges = self.networx.edges() # print('edges --------------------------- >', self.edges) # print(self.nodes) def add_dummy_cpds(self): for i, node in enumerate(self.nodes): cardinality = node[1]['cardinality'] if ('BEN' in node[0]) or ('MEM' in node[0]): self.nodes[i][1]['cpd'] = CPD.create_fixed_parent( cardinality, modus='uniform') else: incoming_nodes = self.networx.in_edges(node[0]) if len(incoming_nodes) == 0: self.nodes[i][1]['cpd'] = CPD.create_random_child( cardinality, modus='orphan') continue card_parent = [] for m, n in enumerate(incoming_nodes): par = self.networx.node[n[0]]['cardinality'] card_parent.append(par) self.nodes[i][1]['cpd'] = CPD.create_random_child( cardinality, card_parent) # for i, node in enumerate(self.nodes): # print(node[0]) # print(node[1]['cpd']) self.nodes = self.networx.nodes(data=True) # print(' IN NETWORX ') # for i, node in enumerate(self.nodes): # print(node[0]) # print(node[1]['cpd']) def create_learning_data(self): self.get_my_colors() self.learning_data = {} ben_nodes = [x for x in self.nodes if "BEN" in x[0]] world_nodes = [x for x in self.nodes if "WORLD" in x[0]] for i, node in enumerate(ben_nodes): self.learning_data.update({node[0]: self.colors_table[i].tolist()}) for i, node in enumerate(world_nodes): shape = self.colors_cpd.values.shape reshaped_cpd = self.colors_cpd.values.reshape( shape[0], int(np.prod(shape) / shape[0])) for hue in range(0, 3): if str(hue) in node[0]: self.learning_data.update( {node[0]: reshaped_cpd[hue, :].tolist()}) # for i, node in enumerate(self.nodes): # if "BEN" in node[0]: # self.learning_data.update({node[0]:self.colors_table[i].tolist()}) # if "WORLD" in node[0]: # shape = self.colors_cpd.values.shape # reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape)/shape[0])) # for hue in range(0,3): # if str(hue) in node[0]: # self.learning_data.update({node[0]:reshaped_cpd[hue,:].tolist()}) # print('Learning data') # print(self.learning_data) def do_it(self): '''EXPLANATIONS''' self.networx_fixed, self.dictionary, self.header = self._util.get_network( ) self.networx = self.networx_fixed.copy() self.networx_test = self.networx_fixed.copy() print('Dictionary : ', self.dictionary) ''' -------------- Constructing all possible topologies, --> option : restrain the number with the treshold : 0 -> all possible topologies, 100 -> only the fully connnected topology''' possible_topologies = self._lat.get_possible_topologies( treshold=50 ) #setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered print("Possible topologies : ", len(possible_topologies)) entropy = 0 count = 0 #TEMPORARY ''' -------------- walking through all toplogies''' for topology in possible_topologies: if self.loop < 200 or self.loop > 350: self.loop += 1 count += 1 continue entropy = topology[1] if entropy == 0: continue #safeguard print('Loop *-> ', self.loop + 1, ' of ', len(possible_topologies)) topo = topology[0] self.networx = nx.DiGraph() self.networx = self.networx_fixed.copy() ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change depending on the number of incoming nodes''' self.add_edges(topo) self.add_dummy_cpds() self.nodes = self.networx.nodes(data=True) self.create_learning_data() # print('edges = ' , self.edges) #print(self.learning_data) ''' ----------- convert DiGraph to pgmpy and check''' self.pgmpy = BayesianModel() self.pgmpy = self._util.translate_digraph_to_pgmpy( self.networx.copy()) '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs -> provide pgmpy with the learning data''' self.pgmpy.check_model() self.estimate_parameters() '''-------------- Testing the constructed topology''' self.test_topology(entropy) '''following 4 lines to remove : just use to check whether the algorithms are correct regarding the edges building''' count += 1 #print('edges : ', self.edges) # # if count > 350: # break print('Check -> number of processed topologies in loop : ', count) # print('My colors : ') # print(self.colors_table) # print(self.colors_cpd) ''' the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs but for the moment being we just assume there are only BEN's and WORLD's''' # self.networx.add_edge('BENS_1','WORLD_1') # self.networx.node['BENS_1']['cpd'] = [0.8,0.2] # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]] ''' if a best model has ben found, save it -> first update the Utility class object and save it''' # self._util.update_networkx(self.networx, self.dictionary, self.header) # self._util.save_network() # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header) # self._util.save_pgmpy_network() self.draw() self.draw_xy() return self.results def draw_xy(self): x = [] y = [] s = [] color = [] best_x = 0 best_y = 0 for i in range(0, len(self.results)): x.append(self.results[i][0]) y.append(self.results[i][1]) if i == self.best_topology[3]: best_x = self.results[i][0] best_y = self.results[i][1] s.append(60) color.append("r") else: s.append(20) color.append("b") plt.scatter(x, y, s=s, c=color, alpha=0.5) plt.xlabel("Complexity of topology") plt.ylabel("Average error over all colors") plt.show() def draw(self): '''TO REMOVE LATER''' plt.figure(figsize=(10, 5)) pos = nx.circular_layout(self.best_topology[2], scale=2) #node_labels = nx.get_node_attributes(self.networx, 'cpd') nx.draw(self.best_topology[2], pos, node_size=1200, node_color='lightblue', linewidths=0.25, font_size=10, font_weight='bold', with_labels=True) plt.text(1, 1, 'Topology nr. : ' + str(self.best_topology[3])) plt.show()
# cancer_model.check_model() # data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) # model = BayesianModel([('A', 'C'), ('B', 'C')]) # estimator = BayesianEstimator(model, data) # cpd_C = estimator.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[2, 4]) # model.add_cpds(cpd_C) model.fit(train, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5 # pseudo_counts = {'D': [300, 700], 'I': [500, 500], 'G': [800, 200], 'L': [500, 500], 'S': [400, 600]} # model.fit(data, estimator=BayesianEstimator, prior_type='dirichlet', pseudo_counts=pseudo_counts) #输出节点信息 print(model.nodes()) #输出依赖关系 print(model.edges()) #查看某节点概率分布 print(model.get_cpds('Pclass').values) from pgmpy.inference import VariableElimination model_infer = VariableElimination(model) q = model_infer.query(variables=['Survived'], evidence={'Fare': 0}) print(q) ''' +------------+-----------------+ | Survived | phi(Survived) | +============+=================+ | Survived_0 | 0.6341 |
('TQ', 'DFT' + str(i)), ('DI' + str(i), 'RD' + str(i)), ('DFT' + str(i), 'RD' + str(i)), ('RD' + str(i), 'DFO' + str(i)), ('OU', 'DFO' + str(i))] list_edges += [('RD0', 'DI1'), ('RD1', 'DI2'), ('DPQ', 'DI0'), ('C', 'DI0')] model.add_edges_from(list_edges) model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10) for edge in model.edges(): print(edge) print("\n") infer = VariableElimination(model) nodes = model.nodes() Distribution = {} for key in pr.keys(): Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)] nodes.remove(key) print('pr done') for key in nodes: Distribution[key] = infer.query([key], evidence = pr)[key].values print('done' + key) print(Distribution['DPQ']) plt.subplot(4, 4, 1) plt.bar([1,2,3,4,5], Distribution['DPQ']) plt.xticks([1.5,2.5,3.5,4.5,5.5], ['very low','low','medium','high','very high'])
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
def bayesnet(): """ References: https://class.coursera.org/pgm-003/lecture/17 http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html https://github.com/pgmpy/pgmpy.git http://pgmpy.readthedocs.org/en/latest/ http://nipy.bic.berkeley.edu:5000/download/11 """ # import operator as op # # Enumerate all possible events # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list)) # _esdat = list(ut.iprod(*map(range, varcard_list))) # _escol = list(map(op.attrgetter('variable'), cpd_list)) # event_space = pd.DataFrame(_esdat, columns=_escol) # # Custom compression of event space to inspect a specific graph # def compress_space_flags(event_space, var1, var2, var3, cmp12_): # """ # var1, var2, cmp_ = 'Lj', 'Lk', op.eq # """ # import vtool as vt # data = event_space # other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3]) # case_flags12 = cmp12_(data[var1], data[var2]).values # # case_flags23 = cmp23_(data[var2], data[var3]).values # # case_flags = np.logical_and(case_flags12, case_flags23) # case_flags = case_flags12 # case_flags = case_flags.astype(np.int64) # subspace = np.hstack((case_flags[:, None], data[other_cols].values)) # sel_ = vt.unique_row_indexes(subspace) # flags = np.logical_and(mask, case_flags) # return flags # # Build special cases # case_same = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)] # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)] # special_cases = [ # case_same, # case_diff, # ] from pgmpy.factors import TabularCPD from pgmpy.models import BayesianModel import pandas as pd from pgmpy.inference import BeliefPropagation # NOQA from pgmpy.inference import VariableElimination # NOQA name_nice = ['n1', 'n2', 'n3'] score_nice = ['low', 'high'] match_nice = ['diff', 'same'] num_names = len(name_nice) num_scores = len(score_nice) nid_basis = list(range(num_names)) score_basis = list(range(num_scores)) semtype2_nice = { 'score': score_nice, 'name': name_nice, 'match': match_nice, } var2_cpd = { } globals()['semtype2_nice'] = semtype2_nice globals()['var2_cpd'] = var2_cpd name_combo = np.array(list(ut.iprod(nid_basis, nid_basis))) combo_is_same = name_combo.T[0] == name_combo.T[1] def get_expected_scores_prob(level1, level2): part1 = combo_is_same * level1 part2 = (1 - combo_is_same) * (1 - (level2)) expected_scores_level = part1 + part2 return expected_scores_level # def make_cpd(): def name_cpd(aid): from pgmpy.factors import TabularCPD cpd = TabularCPD( variable='N' + aid, variable_card=num_names, values=[[1.0 / num_names] * num_names]) cpd.semtype = 'name' return cpd name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')] var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds))) if True: num_same_diff = 2 samediff_measure = np.array([ # get_expected_scores_prob(.12, .2), # get_expected_scores_prob(.88, .8), get_expected_scores_prob(0, 0), get_expected_scores_prob(1, 1), ]) samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist() def samediff_cpd(aid1, aid2): cpd = TabularCPD( variable='A' + aid1 + aid2, variable_card=num_same_diff, values=samediff_vals, evidence=['N' + aid1, 'N' + aid2], # [::-1], evidence_card=[num_names, num_names]) # [::-1]) cpd.semtype = 'match' return cpd samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')] var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds))) if True: def score_cpd(aid1, aid2): semtype = 'score' evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2] evidence_cpds = [var2_cpd[key] for key in evidence] evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds] evidence_card = list(map(len, evidence_nice)) evidence_states = list(ut.iprod(*evidence_nice)) variable_basis = semtype2_nice[semtype] variable_values = [] for mystate in variable_basis: row = [] for state in evidence_states: if state[0] == state[1]: if state[2] == 'same': val = .2 if mystate == 'low' else .8 else: val = 1 # val = .5 if mystate == 'low' else .5 elif state[0] != state[1]: if state[2] == 'same': val = .5 if mystate == 'low' else .5 else: val = 1 # val = .9 if mystate == 'low' else .1 row.append(val) variable_values.append(row) cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=len(variable_basis), values=variable_values, evidence=evidence, # [::-1], evidence_card=evidence_card) # [::-1]) cpd.semtype = semtype return cpd else: score_values = [ [.8, .1], [.2, .9], ] def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['A' + aid1 + aid2], # [::-1], evidence_card=[num_same_diff]) # [::-1]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds + samediff_cpds else: score_measure = np.array([get_expected_scores_prob(level1, level2) for level1, level2 in zip(np.linspace(.1, .9, num_scores), np.linspace(.2, .8, num_scores))]) score_values = (score_measure / score_measure.sum(axis=0)).tolist() def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['N' + aid1, 'N' + aid2], evidence_card=[num_names, num_names]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds pass input_graph = [] for cpd in cpd_list: if cpd.evidence is not None: for evar in cpd.evidence: input_graph.append((evar, cpd.variable)) name_model = BayesianModel(input_graph) name_model.add_cpds(*cpd_list) var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list))) globals()['var2_cpd'] = var2_cpd varnames = [cpd.variable for cpd in cpd_list] # --- PRINT CPDS --- cpd = score_cpds[0] def print_cpd(cpd): print('CPT: %r' % (cpd,)) index = semtype2_nice[cpd.semtype] if cpd.evidence is None: columns = ['None'] else: basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence] columns = [','.join(x) for x in ut.iprod(*basis_lists)] data = cpd.get_cpd() print(pd.DataFrame(data, index=index, columns=columns)) for cpd in name_model.get_cpds(): print('----') print(cpd._str('phi')) print_cpd(cpd) # --- INFERENCE --- Ni = name_cpds[0] event_space_combos = {} event_space_combos[Ni.variable] = 0 # Set ni to always be Fred for cpd in cpd_list: if cpd.semtype == 'score': event_space_combos[cpd.variable] = list(range(cpd.variable_card)) evidence_dict = ut.all_dict_combinations(event_space_combos) # Query about name of annotation k given different event space params def pretty_evidence(evidence): return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val]) for key, val in evidence.items()] def print_factor(factor): row_cards = factor.cardinality row_vars = factor.variables values = factor.values.reshape(np.prod(row_cards), 1).flatten() # col_cards = 1 # col_vars = [''] basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards])))) nice_basis_lists = [] for varname, basis in zip(row_vars, basis_lists): cpd = var2_cpd[varname] _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis) nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis] nice_basis_lists.append(nice_basis) row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)] print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val')) # name_belief = BeliefPropagation(name_model) name_belief = VariableElimination(name_model) import pgmpy import six # NOQA def try_query(evidence): print('--------') query_vars = ut.setdiff_ordered(varnames, list(evidence.keys())) evidence_str = ', '.join(pretty_evidence(evidence)) probs = name_belief.query(query_vars, evidence) factor_list = probs.values() joint_factor = pgmpy.factors.factor_product(*factor_list) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')') # print(six.text_type(joint_factor)) factor = joint_factor # NOQA # print_factor(factor) # import utool as ut print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list])) for evidence in evidence_dict: try_query(evidence) evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0} try_query(evidence) evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0} try_query(evidence) globals()['score_nice'] = score_nice globals()['name_nice'] = name_nice globals()['score_basis'] = score_basis globals()['nid_basis'] = nid_basis print('Independencies') print(name_model.get_independencies()) print(name_model.local_independencies([Ni.variable])) # name_belief = BeliefPropagation(name_model) # # name_belief = VariableElimination(name_model) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # probs = name_belief.query(['Lk'], evidence) # factor = probs['Lk'] # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Lj'] = name_nice[evidence['Lj']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip(name_nice, probs.tolist())) # ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format( # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.drop('Lj', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # query_vars = ['Lk', 'Lj'] # probs = name_belief.query(query_vars, evidence) # for queryvar in query_vars: # factor = probs[queryvar] # print(factor._str('phi')) # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist())) # ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format( # query_var=query_var, # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # _ draw model import plottool as pt import networkx as netx fig = pt.figure() # NOQA fig.clf() ax = pt.gca() netx_nodes = [(node, {}) for node in name_model.nodes()] netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()] netx_graph = netx.DiGraph() netx_graph.add_nodes_from(netx_nodes) netx_graph.add_edges_from(netx_edges) # pos = netx.graphviz_layout(netx_graph) pos = netx.pydot_layout(netx_graph, prog='dot') netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True) pt.plt.savefig('foo.png') ut.startfile('foo.png')
#Read the attributes lines = list(csv.reader(open('data7_names.csv', 'r'))); attributes = lines[0] #Read Cleveland Heart dicease data heartDisease = pd.read_csv('data7_heart.csv', names = attributes) heartDisease = heartDisease.replace('?', np.nan) # Model Baysian Network model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), ('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'), ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')]) print('\nBayesian Network Nodes are: ') print('\t',model.nodes()) print('\nBayesian Network Edges are:') print('\t',model.edges()) # Learning CPDs using Maximum Likelihood Estimators print('\nLearning CPDs using Maximum Likelihood Estimators...'); model.fit(heartDisease, estimator=MaximumLikelihoodEstimator) # Inferencing with Bayesian Network print('\nInferencing with Bayesian Network:') HeartDisease_infer = VariableElimination(model) # Computing the probability of bronc given smoke. print('\n1.Probability of HeartDisease given Age=28') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28}) print(q['heartdisease'])
UGM = DGM.to_markov_model() jtree = UGM.to_junction_tree() evidence = {'A': 1} marginal = jta(UGM, jtree, evidence.items()) print "Results of the implemented JTA" for m in marginal: print m print "\n=======================================\n" print "Results of the Variable Elimination from pgmpy" inference = VariableElimination(DGM) for v in get_different(DGM.nodes(), evidence): print inference.query(variables=[v], evidence=evidence)[v] # visualization part # nx.draw_circular(DGM, with_labels=True, node_color="white", node_size=1000) # plt.draw() # plt.show() # nx.draw_circular(UGM, with_labels=True, node_color="white", node_size=1000) # plt.draw() # plt.show() # nx.draw_circular(jtree, with_labels=True, node_color="white", node_shape='s', node_size=8000) # plt.draw() # plt.show()
from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD from pgmpy.inference import VariableElimination cancer_model = BayesianModel([('Pollution', 'Cancer'), ('Smoker', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea')]) print('Bayesian network models are :') print('\t', cancer_model.nodes()) print('Bayesian edges are:') print('\t', cancer_model.edges()) cpd_poll = TabularCPD(variable='Pollution', variable_card=2, values=[[0.9], [0.1]]) cpd_smoke = TabularCPD(variable='Smoker', variable_card=2, values=[[0.3], [0.7]]) cpd_cancer = TabularCPD(variable='Cancer', variable_card=2, values=[[0.03, 0.05, 0.001, 0.02], [0.97, 0.95, 0.999, 0.98]], evidence=['Smoker', 'Pollution'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='Xray', variable_card=2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Cancer'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2, values=[[0.65, 0.3], [0.35, 0.7]],
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"), ("b", "c")]) self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")]) diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD( "grade", 3, values=[ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"), ("i", "l")]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")] or (edge[1], edge[0]) in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"), ("a", "b")]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")] or (edge[1], edge[0]) in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of("g") ancenstors2 = self.G2._get_ancestors_of("d") ancenstors3 = self.G2._get_ancestors_of(["i", "l"]) self.assertEqual(ancenstors1, {"d", "i", "g"}) self.assertEqual(ancenstors2, {"d"}) self.assertEqual(ancenstors3, {"g", "i", "l", "d"}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, "h") def test_get_cardinality(self): self.assertDictEqual(self.G1.get_cardinality(), { "diff": 2, "intel": 3, "grade": 3 }) def test_get_cardinality_with_node(self): self.assertEqual(self.G1.get_cardinality("diff"), 2) self.assertEqual(self.G1.get_cardinality("intel"), 3) self.assertEqual(self.G1.get_cardinality("grade"), 3) def test_local_independencies(self): self.assertEqual(self.G.local_independencies("a"), Independencies(["a", ["b", "c"]])) self.assertEqual( self.G.local_independencies("c"), Independencies(["c", ["a", "d", "e"], "b"]), ) self.assertEqual(self.G.local_independencies("d"), Independencies(["d", "c", ["b", "a"]])) self.assertEqual( self.G.local_independencies("e"), Independencies(["e", ["c", "b", "a"], "d"]), ) self.assertEqual(self.G.local_independencies("b"), Independencies(["b", "a"])) self.assertEqual(self.G1.local_independencies("grade"), Independencies()) def test_get_independencies(self): chain = BayesianModel([("X", "Y"), ("Y", "Z")]) self.assertEqual(chain.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) fork = BayesianModel([("Y", "X"), ("Y", "Z")]) self.assertEqual(fork.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) collider = BayesianModel([("X", "Y"), ("Z", "Y")]) self.assertEqual(collider.get_independencies(), Independencies(("X", "Z"), ("Z", "X"))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128, ] JPD = JointProbabilityDistribution(["diff", "intel", "grade"], [2, 3, 3], val) fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_markov_blanet(self): G = DAG([ ("x", "y"), ("z", "y"), ("y", "w"), ("y", "v"), ("u", "w"), ("s", "v"), ("w", "t"), ("w", "m"), ("v", "n"), ("v", "q"), ]) self.assertEqual(set(G.get_markov_blanket("y")), set(["s", "w", "x", "u", "z", "v"])) def test_get_immoralities(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")}) G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")]) self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")}) G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"), ("w", "x")]) self.assertEqual(G2.get_immoralities(), {("w", "z")}) def test_is_iequivalent(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")]) G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds("diff")), id(model_copy.get_cpds("diff"))) self.G1.remove_cpds("diff") diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds("diff"), model_copy.get_cpds("diff")) self.G1.remove_node("intel") self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node("diff") self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") def test_remove_nodes_from(self): self.G1.remove_nodes_from(["diff", "grade"]) self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") self.assertRaises(ValueError, self.G1.get_cpds, "grade") def tearDown(self): del self.G del self.G1
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
class TestGibbsSampling(unittest.TestCase): def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = DiscreteFactor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model) def tearDown(self): del self.bayesian_model del self.markov_model @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True) @patch('pgmpy.models.MarkovChain.__init__', autospec=True) def test_init_bayesian_model(self, init, get_kernel): model = MagicMock(spec_set=BayesianModel) gibbs = GibbsSampling(model) init.assert_called_once_with(gibbs) get_kernel.assert_called_once_with(gibbs, model) @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True) def test_init_markov_model(self, get_kernel): model = MagicMock(spec_set=MarkovModel) gibbs = GibbsSampling(model) get_kernel.assert_called_once_with(gibbs, model) def test_get_kernel_from_bayesian_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_bayesian_model(self.bayesian_model) self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes()) self.assertDictEqual(gibbs.cardinalities, { 'diff': 2, 'intel': 2, 'grade': 3 }) def test_get_kernel_from_markov_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_markov_model(self.markov_model) self.assertListEqual(list(gibbs.variables), self.markov_model.nodes()) self.assertDictEqual(gibbs.cardinalities, { 'A': 2, 'B': 3, 'C': 4, 'D': 2 }) def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2})) @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True) def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [ State('diff', 0), State('intel', 0), State('grade', 0) ] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2) def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual( {samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual( {samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'}) @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True) def test_generate_sample_less_arg(self, random_state): self.gibbs.state = None gen = self.gibbs.generate_sample(size=2) samples = [sample for sample in gen] random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(samples), 2)
class Utilities(object): def __init__(self, file): ''' no object creation -> opportune ?''' self.keywords = ['BENS', 'MEMS', 'LANS', 'MOTOR', 'WORLD'] self.standard_nodes = { 'RONS': { 'BENS': [], 'MEMS': [] }, 'LANS': { 'LANS': [] }, 'LENS': { 'MOTOR': [], 'WORLD': [] } } self.file = file self.get_json_path(file) self.pgmpy_object = BayesianModel() self.networkx_object = nx.DiGraph() self.header = '' self.dictionary = [] def get_nodes_in_family(self, family, attributes=False): nw_nodes = self.networkx_object.nodes() nw_dim = np.asarray(nw_nodes).ndim nodes = [] for i, node in enumerate(nw_nodes): if nw_dim > 1: node = node[0] if family in node: nodes.append(node) return nodes def check_json_path(directory): """ Checks whether the necessary project_repository directory exists. If not, creates it :param directory: the mother directory to search from downwards :type directory: string :rtype : none """ if not os.path.exists(directory + '\project_repository\\'): os.makedirs(directory + '\project_repository\\') def get_json_path(self, file): """ Creates a string containing the full path for the filename passed so it will be saved in the project_repository directory :param filename: filename without path or extension :return: a full path for the file :type filename :string :rtype : string """ levels = 5 common = os.path.dirname(os.path.realpath(__file__)) for i in range(levels + 1): common = os.path.dirname(common) if 'peepo\peepo' not in common: break Utilities.check_json_path(common) self.file = str(common + '\project_repository\\' + file + '.json') print('in get_json_path :', self.file) def save_json(self, astring): """ This helping function is only needed to have the json file formatted in a user friendly way as the "dump" method does not provide a lot of possibilities to get it "pretty" :param file :the ull path of the json file :param astring: the name of the string containing the whole information :return: void :type file: string :type astring : string :rtype : void """ text_file = open(str(self.file), "w") '''remove all LF written by the dump method''' astring = re.sub('\n', '', astring) '''For keywords -> insert LF and tabs''' astring = re.sub('\"Identification', '\n\"Identification', astring) astring = re.sub('\"Date', '\n\"Date', astring) astring = re.sub('\"Description', '\n\"Description', astring) astring = re.sub('\"Train_from', '\n\"Train_from', astring) astring = re.sub('\"Frozen', '\n\"Frozen', astring) astring = re.sub('\"Nodes', '\n\n\"Nodes', astring) astring = re.sub('\"RONS', '\n\t\t\"RONS', astring) astring = re.sub('\"BENS', '\n\t\t\t\"BENS', astring) astring = re.sub('\"MEMS', '\n\t\t\t\"MEMS', astring) astring = re.sub('\"LANS', '\n\t\t\"LANS', astring) astring = re.sub('\"LENS', '\n\t\t\"LENS', astring) astring = re.sub('\"MOTOR', '\n\t\t\t\"MOTOR', astring) astring = re.sub('\"WORLD', '\n\t\t\t\"WORLD', astring) astring = re.sub('\"Edges', '\n\n\"Edges', astring) astring = re.sub('\"CPDs', '\n\n\"CPDs', astring) astring = re.sub('{', '\n\t\t{', astring) text_file.write(astring) text_file.write('\n') text_file.close() def translation(self, astring, from_man_to_machine): """ Given an array of tuples (a,b) in dictionary, returns the second element of the tuple where astring was found Is used to not loose the users node names as peepo generates standardized names for the corresponding node :param dictionary:an array of tuples -> is created in the method : get_network(file) :param astring: the name of the node passsed by the user :param from_man_to_machine: an integer -> 0 when we want the translation for the user give name to the standardized name, 1 the other way around :return: the corresponding standardized node name :type dictionary: np.array :type astring : string :rtype : string """ source = 0 target = 1 if from_man_to_machine == 1: source = 1 target = 0 for index, item in enumerate(self.dictionary): if item[source] == astring: break return item[target] def clean_edge_list(self, edge_array, parent): '''the get functions for the edges, both in networx as pgmpy contain the parent name this function removes it from the list''' cleaned_list = [] for a in edge_array: if a != parent: cleaned_list.append(a) return cleaned_list def clean_parent_list(self, parent_array, child): '''the get functions for the edges, both in networx as pgmpy contain the parent name this function removes it from the list''' cleaned_list = [] for i, a in enumerate(parent_array): if a[0] != child: cleaned_list.append(a[0]) return cleaned_list def get_edges(self): """ Creates a dictionary with a node as a key and an array with its child as value (the methods get_child give generally a list of tuples (parent,child) :param pgmpy_object: the pgmpy network :return: a dictionary with the edges of all the node :type fpgmpy_object:adress :rtype :dictionary """ edg = self.pgmpy_object.edges() edges = dict() [ edges[str(t[0])].append(str(t[1])) if t[0] in list(edges.keys()) else edges.update({str(t[0]): [str(t[1])]}) for t in edg ] return edges def get_nodes_and_attributes(self): """ Creates an array of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's and the key cardinality returns an int the key cpd a 2 dimensional matrix :param pgmpy_object: the pgmpy network :return: array of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's :type :pgmpy_object:adress :rtype :array of tuples """ nodes = self.pgmpy_object.nodes() nod_and_attributes = [] [ nod_and_attributes.append((str(node), { 'cardinality': int(self.pgmpy_object.get_cardinality(node)), 'cpd': self.pgmpy_object.get_cpds(node).values.astype(float) })) for i, node in enumerate(nodes) ] #need to reshape the cpds when more than 1 parent for i, node in enumerate(nod_and_attributes): shape = nod_and_attributes[i][1]['cpd'].shape dimension = nod_and_attributes[i][1]['cpd'].ndim if dimension > 2: col = int(np.prod(shape) / shape[0]) nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][ 'cpd'].reshape(shape[0], col) nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][ 'cpd'].tolist() return nod_and_attributes def translate_pgmpy_to_digraph(self): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.networkx_object = nx.DiGraph() edges = self.pgmpy_object.edges() nodes_and_attributes = self.get_nodes_and_attributes() self.networkx_object.add_nodes_from(nodes_and_attributes) self.networkx_object.add_edges_from(edges) return def update_networkx(self, networkx, dic, header): self.header = header self.dictionary = dic self.networkx_object = networkx def update_pgmpy(self, pgmpy, dic, header): self.header = header self.dictionary = dic self.pgmpy_object = pgmpy def save_pgmpy_network(self): """ Saves the passed pgmpy_object class object in a json file """ self.translate_pgmpy_to_digraph() self.save_network() return def translate_digraph_to_pgmpy(self, digraf): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.pgmpy_object, x, y = self.get_pgmpy_network(from_object=True, digraph=digraf) return self.pgmpy_object def translate_pgmpy_to_digraph(self): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.networkx_object = nx.DiGraph() edges = self.pgmpy_object.edges() nodes_and_attributes = self.get_nodes_and_attributes() self.networkx_object.add_nodes_from(nodes_and_attributes) self.networkx_object.add_edges_from(edges) return def save_network(self): """ Saves the passed networkx class object in a json file """ data = self.get_empty_canvas() data["header"] = self.header nw_nodes = self.networkx_object.nodes(data=True) nw_edges = self.networkx_object.edges() keywords = self.keywords nodes = copy.deepcopy( self.standard_nodes ) #{'RONS': {'BENS': [], 'MEMS': []}, 'LANS': {'LANS': []}, 'LENS': {'MOTOR': [], 'WORLD': []}} edges = [] cpds = [] '''adding edges''' for i, node in enumerate(nw_nodes): node_name = node[0] childs = [] for k, edge in enumerate(nw_edges): if edge[0] == node_name: childs.append(self.translation(edge[1], 1)) if len(childs) != 0: edges.append({self.translation(node_name, 1): childs}) for i, node in enumerate(nw_nodes): node_name = node[0] cardinality = node[1]['cardinality'] cpd = node[1]['cpd'] for pseudonym in keywords: if pseudonym in node_name: node_name_ = self.translation(node_name, 1) if pseudonym == 'BENS' or pseudonym == 'MEMS': nodes['RONS'][pseudonym].append( [node_name_, cardinality]) if pseudonym == 'LANS': nodes['LANS'][pseudonym].append( [node_name_, cardinality]) if pseudonym == 'MOTOR' or pseudonym == 'WORLD': nodes['LENS'][pseudonym].append( [node_name_, cardinality]) cpds.append({self.translation(node_name, 1): cpd}) data['Nodes'] = nodes data['Edges'] = edges data['CPDs'] = cpds data['header']['Date'] = datetime.datetime.now().strftime("%c") self.save_json(json.dumps(data)) return def get_pgmpy_network(self, from_object=False, digraph=None): """ Reads the passed json file and translates it's content to the passed pgmpy class object - uses the get_network(file) to read the json file in a networkx format and translate this to pgmpy - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)] :param file: : filename without path or extension :pgmp_object : the pgmpy object which will be completed :return: a dictionary as an array of tuples and the header of the json file :type file : string :type pgmp_object : pgmpy class object :rtype : array of tuples, dictionary CAUTION : the method does not perform a check() on the constructed DAG ! -> has to be done in the calling module """ self.pgmpy_object = BayesianModel() if not (from_object): network, dictionary, header = self.get_network() else: network = digraph nw_nodes = network.nodes(data=True) nw_edges = network.edges() '''adding nnodes and edges''' for i, node in enumerate(nw_nodes): node_name = node[0] self.pgmpy_object.add_node(node_name) for k, edge in enumerate(nw_edges): if edge[0] == node_name: self.pgmpy_object.add_edge(node_name, edge[1]) '''add cpd's''' for i, node in enumerate(nw_nodes): parent_nodes = network.in_edges(node[0]) parent_nodes = self.clean_parent_list(parent_nodes, node[0]) cpd = node[1]['cpd'] ''' find the cardinality of the node ''' cardinality_node = node[1]['cardinality'] """ cardinality card of parents has to be determined""" cardinality_parents = [] for i, nod in enumerate(parent_nodes): cardinality_parents.append(network.node[nod]['cardinality']) ''' Depending on the place in the BN and/or the number of parents the PGMPY CPD methods have another call''' if len(cardinality_parents) == 0: self.pgmpy_object.add_cpds( TabularCPD(variable=node[0], variable_card=cardinality_node, values=[cpd])) continue table = TabularCPD(variable=node[0], variable_card= cardinality_node, values=cpd, \ evidence=parent_nodes,\ evidence_card=np.asarray(cardinality_parents)) self.pgmpy_object.add_cpds(table) '''------TO DELETE-------------''' # pgmpy_object.check_model() # draw_network(pgmpy_object) '''-----------------------------''' return self.pgmpy_object, self.dictionary, self.header def get_network(self): """ Reads the passed json file and translate it's content in a networkx class object - The nodes in the object are renamed so they have a standardized signature - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)] :param file: : filename without path or extension :return: a networkx class object, dictionary as an array of tuples and the header of the json file :type file : string :rtype : networkx class object, array of tuples, dictionary """ self.dictionary = [] self.networkx_object = nx.DiGraph() with open(self.file) as f: data = f.read() '''Remove possible non informative characters''' data = re.sub('\n', '', data) data = re.sub('\t', '', data) data = json.loads(data) self.header = data['header'] '''Feeding G with the nodes''' cardinality = {} for key in data['Nodes'].keys(): for secondkey in data['Nodes'][key].keys(): for c, n in enumerate(data['Nodes'][key][secondkey]): node = secondkey + "_" + str(c) self.networkx_object.add_node(node, { 'cardinality': n[1], 'cpd': [] }) self.dictionary.append((n[0], node)) cardinality.update( {node: n[1]} ) #this contains the cardinality of each node with the node name as dictionary entry '''Feeding G with the edges''' edges = [] for j, pair in enumerate(data['Edges']): for parent in pair.keys(): for child in data['Edges'][j][parent]: parent_ = self.translation(parent, 0) child_ = self.translation(child, 0) edges.append((parent_, child_)) np.ravel(edges) self.networkx_object.add_edges_from(edges) '''Feeding G with the CPD's as nodes attributes''' for j, node in enumerate(data['CPDs']): for parent, cpd in node.items(): node_ = self.translation(parent, 0) self.networkx_object.node[node_]['cpd'] = cpd '''TO REMOVE LATER''' # plt.figure(figsize=(10, 5)) # pos = nx.circular_layout(G, scale=2) # node_labels = nx.get_node_attributes(G, 'cpd') # nx.draw(G, pos, node_size=1200, node_color='lightblue', # linewidths=0.25, font_size=10, font_weight='bold', with_labels=True) # plt.show() return self.networkx_object, self.dictionary, self.header def create_json_file(self, **kwargs): """ EWAMPLE : A helping method if the user prefers to create the BN within the code :param case_name: the file name without path or extension where the json file will be saved :param : **kwargs takes the following variables: description = kwargs.get('description', '') train_from = kwargs.get('train_from', '') cpds = kwargs.get('CPDs', []) bens = kwargs.get('BENS',[]) mems = kwargs.get('MEMS', []) lans = kwargs.get('LANS', []) motors = kwargs.get('MOTORS', []) world = kwargs.get('WORLD', []) edges = kwargs.get('Edges', []) frozen = kwargs.get('frozen',False) . . . :return: void :type case_name : string :type : . . . :rtype : void """ description = kwargs.get('description', '') train_from = kwargs.get('train_from', '') cpds = kwargs.get('CPDs', []) bens = kwargs.get('BENS', []) mems = kwargs.get('MEMS', []) lans = kwargs.get('LANS', []) motors = kwargs.get('MOTORS', []) world = kwargs.get('WORLD', []) edges = kwargs.get('Edges', []) frozen = kwargs.get('frozen', False) #json_tab_file_write = JSONTabIndentFileWriter( Case_name,5a) data = self.get_empty_canvas() ''' - the 3 next items are for tracking purpose only, not fundamentally necessary''' data["header"]['Identification'] = self.file data["header"]['Date'] = datetime.datetime.now().strftime("%c") data["header"]['Description'] = description ''' - the next item gives a file containing possible training data (OPTIONAL)''' data["header"]['Train_from'] = train_from ''' Frozen tells whether or not the model can be considered as final i.e. is there still "training" needed''' data["header"]['Frozen'] = frozen ''' - the 5 next lines tells how much nodes and their names + cardinality the model will start with the names can be any valid python string''' bens = [['pooping', 2], ['peeing', 2], ['constipated', 2]] mems = [['havenotoiletpaper', 2]] lans = [['diarhea', 2], ['happypoop', 2]] motors = [['asshole1', 2], ['asshole2', 2]] world = [['toilet1', 2], ['toilet2', 2], ['garden1', 2], ['garden2', 2], ['doctor', 2]] ''' - the next items describe the edges as a dictionary -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs''' edges = [] ''' !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty''' edges.append({'pooping': ['toilet1', 'diarhea', 'happypoop']}) edges.append({'peeing': ['toilet2', 'garden1', 'garden2']}) edges.append({'constipated': ['doctor']}) edges.append({'havenotoiletpaper': ['garden1', 'garden2']}) edges.append( {'diarhea': ['toilet1', 'doctor', 'asshole1', 'asshole2']}) edges.append( {'happypoop': ['garden1', 'garden2', 'asshole1', 'asshole2']}) ''' - the next items describe the CPD's as a dictionary -> the dictionary entry is the corresponding node''' cpds = [] cpds.append({'pooping': [0.5, 0.5]}) cpds.append({'peeing': [0.2, 0.8]}) cpds.append({'constipated': [0.9, 0.1]}) cpds.append({'havenotoiletpaper': [0.6, 0.4]}) cpds.append({'happypoop': [[0.3, 0.8], [0.7, 0.2]]}) cpds.append({'diarhea': [[0.8, 0.3], [0.2, 0.7]]}) cpds.append({'toilet1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'asshole1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'asshole2': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'toilet2': [[0.5, 0.5], [0.5, 0.5]]}) cpds.append({'doctor': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({ 'garden1': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5], [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]] }) cpds.append({ 'garden2': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5], [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]] }) ''' - feeding the data''' data["Nodes"]['RONS']['BENS'] = bens data["Nodes"]['RONS']['MEMS'] = mems data["Nodes"]['LANS']['LANS'] = lans data["Nodes"]['LENS']['MOTOR'] = motors data["Nodes"]['LENS']['WORLD'] = world data["Edges"] = edges data["CPDs"] = cpds ''' dumping to CASENAME file in jason format''' self.save_json(json.dumps(data)) print("Json file for - ", self.file, " - created") def create_json_template(self): """ A helping method if the jason template in the project_repository ditectory has been deleted or corrupted :param : void :return: void :type : void :rtype : void """ self.get_json_path( "Template" ) # creates the right path in which case_name will be saved data = self.get_empty_canvas() data['header']['Identification'] = self.file '''Filling some dummies to facilitate the user''' a_node = ['*', 0] an_edge = {'*': ['&', '&', '&']} a_cpd = {'*': [[0, 0, 0], [0, 0, 0]]} nodes = [] edges = [] cpds = [] for i in range(0, 3): nodes.append(a_node) edges.append(an_edge) cpds.append(a_cpd) data['Nodes']['RONS']['BENS'] = nodes data['Nodes']['RONS']['MEMS'] = nodes data['Nodes']['LANS']['LANS'] = nodes data['Nodes']['LENS']['MOTOR'] = nodes data['Nodes']['LENS']['WORLD'] = nodes data['Edges'] = edges data['CPDs'] = cpds ''' dumping to CASENAME file in jason format''' # with open(case_name, 'w') as f: # json.dump(data, f, separators = (",",":")) self.save_json(json.dumps(data)) print("Empty template created") def get_empty_canvas(self): """ This method creates a json canvas which will be used for the several json creating method :param : void :return: a dictionary with the structure of the json file :type : non :rtype : dictionary """ data = { 'header': { 'Identification': '', 'Date': '', 'Description': '', 'Frozen': '', 'Train_from': '' }, 'Nodes': {}, 'Edges': [], 'CPDs': [] } ''' - the 5 next lines tells how much nodes and their names the model will start with the names can be any valid python string''' bens = [] mems = [] lans = [] motors = [] world = [] ''' - the next items describe the edges as a dictionary -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty''' edges = [] ''' - the next items describe the CPD's as a dictionary -> the dictionary entry is the corresponding node''' cpds = [] ''' - feeding the data''' data['Nodes'] = { 'RONS': { 'BENS': bens, 'MEMS': mems }, 'LANS': { 'LANS': lans }, 'LENS': { 'MOTOR': motors, 'WORLD': world } } data['Edges'] = edges data['CPDs'] = cpds return data
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128 ] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
class Network_handler: ''' Handles creation and usage of the probabilistic network over CERN's data. Can deal only with a SINGLE file-priority combination. Note that the methods of this class have numbers and must be called in order. ''' def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix() def learn_structure(self, method, scoring_method, log=True): ''' (4) Method that builds the structure of the data ----------------- Parameters: method : The technique used to search for the structure -> scoring_approx - To use an approximated search with scoring method -> scoring_exhaustive - To use an exhaustive search with scoring method -> constraint - To use the constraint based technique scoring_method : K2, bic, bdeu log - "True" if you want to print debug information in the console ''' #Select the scoring method for the local search of the structure if scoring_method == "K2": scores = K2Score(self.data) elif scoring_method == "bic": scores = BicScore(self.data) elif scoring_method == "bdeu": scores = BdeuScore(self.data) #Select the actual method if method == "scoring_approx": est = HillClimbSearch(self.data, scores) elif method == "scoring_exhaustive": est = ExhaustiveSearch(self.data, scores) elif method == "constraint": est = ConstraintBasedEstimator(self.data) self.best_model = est.estimate() self.eliminate_isolated_nodes( ) # REMOVE all nodes not connected to anything else for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) self.log("Method used for structural learning: " + method, log) #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log) self.log("Search terminated", log) def estimate_parameters(self, log=True): ''' (5) Estimates the parameters of the found network ''' estimator = BayesianEstimator(self.best_model, self.data) self.file_writer.write_txt("Number of nodes: " + str(len(self.variables_names))) self.file_writer.write_txt("Complete list: " + str(self.variables_names)) for node in self.best_model.nodes(): cpd = estimator.estimate_cpd(node, prior_type='K2') self.best_model.add_cpds(cpd) self.log(cpd, log) self.file_writer.write_txt(cpd.__str__()) def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) ''' map_query = inference.map_query(variables, evidence) print(map_query) ''' def draw_network(self, label_choice, location_choice, location, log): ''' (7) Draws the bayesian network. ---- location_choice = True iff we want to show the location of devices in the graph. label_choice = "single" if we want to show single label, "double" for double label of arcs location = 0,1,2 depending by the location (H0, H1, H2) ''' bn_graph = gv.Digraph(format="png") # Extract color based on the building if location_choice: devices = self.variables_names device_location = dict() device_locationH1 = dict() #For H0 for d in devices: allDevicesLocations = self.general_handler.get_device_locations( ) device_location[d] = allDevicesLocations[d][0] device_locationH1[d] = allDevicesLocations[d][1] #temp for H1 location_color = self.assign_color(device_location) location_colorH1 = self.assign_color(device_locationH1) ''' # Logging and saving info self.log(device_location, log) self.log(location_color, log) self.file_writer.write_txt(device_location, newline = True) self.file_writer.write_txt(location_color, newline = True) ''' # Creating the subgraphs, one for each location: loc_subgraphs = dict() for loc in location_color: name = "cluster_" + loc loc_subgraphs[loc] = gv.Digraph(name) loc_subgraphs[loc].graph_attr[ 'label'] = loc #Label with name to be visualized in the image # Create nodes for node in self.best_model.nodes(): if location_choice: locationH0 = device_location[node] locationH1 = device_locationH1[node] loc_subgraphs[locationH0].node( node, style='filled', fillcolor=location_colorH1[locationH1] ) #add the node to the right subgraph #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0 else: bn_graph.node(node) # Add all subgraphs in the final graph: if location_choice: for loc in loc_subgraphs: bn_graph.subgraph(loc_subgraphs[loc]) # Create and color edges for edge in self.best_model.edges_iter(): inference = VariableElimination(self.best_model) label = "" # Inference for first label and color of edges variables = [edge[1]] evidence = dict() evidence[edge[0]] = 1 phi_query = inference.query(variables, evidence) value = phi_query[edge[1]].values[1] value = round(value, 2) if label_choice == "single": label = str(value) if label_choice == "double": # Inference for second label variables = [edge[0]] evidence = dict() evidence[edge[1]] = 1 phi_query = inference.query(variables, evidence) value_inv = phi_query[edge[0]].values[1] value_inv = round(value_inv, 2) label = str(value) + "|" + str(value_inv) if value >= 0.75: bn_graph.edge(edge[0], edge[1], color="red", label=label) else: bn_graph.edge(edge[0], edge[1], color="black", label=label) # Save the .png graph if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix else: if location_choice: locat = "_H0H1" else: locat = "" imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat bn_graph.render(imgPath) os.remove(imgPath) #remove the source code generated by graphviz def data_info(self, selection, log): ''' (9) Prints or logs some extra information about the data or the network ''' # 1 - DEVICE FREQUENCY AND OCCURRENCES if 1 in selection: self.file_writer.write_txt( "Device ranking (max 20 devices are visualized)", newline=True) i = 1 for dr in self.rankedDevices: self.file_writer.write_txt(dr[0] + " \t" + str(dr[1]) + "\t" + str(dr[2])) i = i + 1 if i == 20: break # 2 - EDGES OF THE NETWORK if 2 in selection: self.file_writer.write_txt("Edges of the network:", newline=True) for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) # 3 - MARKOV NETWORK if 3 in selection: self.markov = self.best_model.to_markov_model( ) #create the markov model from the BN nice_graph = pydot.Dot(graph_type='graph') for node in self.markov.nodes(): node_pydot = pydot.Node(node) nice_graph.add_node(node_pydot) for edge in self.markov.edges(): edge_pydot = pydot.Edge(edge[0], edge[1], color="black") nice_graph.add_edge(edge_pydot) nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-markov.png') self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True) for factor in self.markov.factors: self.log("MARKOV---------------------------------------", log) self.log(factor, log) self.file_writer.write_txt(factor.__str__()) # 4 - INFERENCE NETWORK if 4 in selection: nice_graph = pydot.Dot(graph_type='digraph') nodes = self.best_model.nodes() inference = VariableElimination(self.best_model) for node1 in nodes: pos = nodes.index(node1) + 1 for i in range(pos, len(nodes)): node2 = nodes[i] variables = [node2] evidence = dict() evidence[node1] = 1 phi_query = inference.query(variables, evidence) prob1 = phi_query[node2].values[ 1] #probability of direct activation (inference from node1=1 to node2) variables = [node1] evidence = dict() evidence[node2] = 1 phi_query = inference.query(variables, evidence) prob2 = phi_query[node1].values[ 1] #probability of inverse activation (inference from node2=1 to node1) prob1 = round(prob1, 2) prob2 = round(prob2, 2) if prob1 >= 0.75 and ( prob1 - prob2 ) <= 0.40: #add direct arc from node1 to node2 ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="red", label=double_label)) elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="red", label=double_label)) elif prob1 >= 0.75 and prob2 >= 0.75: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="orange", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="orange", label=double_label)) elif prob1 >= 0.55 and prob2 >= 0.55: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="black", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="black", label=double_label)) if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix nice_graph.write_png(imgPath + "-inference_network.png") else: nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-inference_network.png') def fix_node_presence(self, nodes, pydot_graph): ''' Adds the list of nodes to the graph, if they are not already present ''' for node in nodes: if node not in pydot_graph.get_nodes(): pydot_graph.add_node(pydot.Node(node)) def eliminate_isolated_nodes(self): ''' If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph ''' for nodeX in self.best_model.nodes(): tup = [item for item in self.best_model.edges() if nodeX in item] if not tup: self.file_writer.write_txt( "Node " + str(nodeX) + " has no edges: it has been eliminated.") self.best_model.remove_node(nodeX) if self.best_model.nodes() == []: raise DataError("No nodes left in this file-priority combination.") def assign_color(self, device_location): ''' Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS) ''' system_color = [ 'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey', 'Light Red', 'Light Blue', 'Light Green' ] location_color = dict() # key = location; value = color for dev, loc in device_location.items(): if loc not in location_color: color = system_color[0] system_color.remove(color) location_color[loc] = color return location_color def log(self, text, log): ''' Prints the text in the console, if the "log" condition is True. ''' if log: print(text)
values=[[0.998], [0.002]]) cpd_alarm = TabularCPD(variable='Alarm', variable_card=2, values=[[0.999, 0.71, 0.06, 0.05], [0.001, 0.29, 0.94, 0.95]], evidence=['Burglary', 'Earthquake'], evidence_card=[2, 2]) cpd_johncalls = TabularCPD(variable='JohnCalls', variable_card=2, values=[[0.95, 0.1], [0.05, 0.9]], evidence=['Alarm'], evidence_card=[2]) cpd_marycalls = TabularCPD(variable='MaryCalls', variable_card=2, values=[[0.1, 0.7], [0.9, 0.3]], evidence=['Alarm'], evidence_card=[2]) # Associating the parameters with the model structure alarm_model.add_cpds(cpd_burglary, cpd_earthquake, cpd_alarm, cpd_johncalls, cpd_marycalls) #new cell alarm_model.check_model() #new cell alarm_model.nodes() #new cell alarm_model.edges() #new cell alarm_model.local_independencies('Burglary') #new cell alarm_model.local_independencies('JohnCalls')
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual( hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
class TestGibbsSampling(unittest.TestCase): def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model) def tearDown(self): del self.bayesian_model del self.markov_model @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True) @patch('pgmpy.models.MarkovChain.__init__', autospec=True) def test_init_bayesian_model(self, init, get_kernel): model = MagicMock(spec_set=BayesianModel) gibbs = GibbsSampling(model) init.assert_called_once_with(gibbs) get_kernel.assert_called_once_with(gibbs, model) @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True) def test_init_markov_model(self, get_kernel): model = MagicMock(spec_set=MarkovModel) gibbs = GibbsSampling(model) get_kernel.assert_called_once_with(gibbs, model) def test_get_kernel_from_bayesian_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_bayesian_model(self.bayesian_model) self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3}) def test_get_kernel_from_markov_model(self): gibbs = GibbsSampling() gibbs._get_kernel_from_markov_model(self.markov_model) self.assertListEqual(list(gibbs.variables), self.markov_model.nodes()) self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2}) def test_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(start_state, 2) self.assertEquals(len(sample), 2) self.assertEquals(len(sample.columns), 3) self.assertIn('diff', sample.columns) self.assertIn('intel', sample.columns) self.assertIn('grade', sample.columns) self.assertTrue(set(sample['diff']).issubset({0, 1})) self.assertTrue(set(sample['intel']).issubset({0, 1})) self.assertTrue(set(sample['grade']).issubset({0, 1, 2})) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_sample_less_arg(self, random_state): self.gibbs.state = None random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)] sample = self.gibbs.sample(size=2) random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(sample), 2) def test_generate_sample(self): start_state = [State('diff', 0), State('intel', 0), State('grade', 0)] gen = self.gibbs.generate_sample(start_state, 2) samples = [sample for sample in gen] self.assertEqual(len(samples), 2) self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'}) self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'}) @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True) def test_generate_sample_less_arg(self, random_state): self.gibbs.state = None gen = self.gibbs.generate_sample(size=2) samples = [sample for sample in gen] random_state.assert_called_once_with(self.gibbs) self.assertEqual(len(samples), 2)
# Bayesian network for students from pgmpy.models import BayesianModel model = BayesianModel() # Add nodes model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter']) print(model.nodes()) # Add edges model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')]) print(model.edges())
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.g.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [["a", "b"], ["b", "c"]]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node("a") self.assertListEqual(list(self.G.nodes()), ["a"]) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge("d", "e") self.assertListEqual(sorted(self.G.nodes()), ["d", "e"]) self.assertListEqual(list(self.G.edges()), [("d", "e")]) self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, "a", "a") def test_add_edge_result_cycle(self): self.G.add_edges_from([("a", "b"), ("a", "c")]) self.assertRaises(ValueError, self.G.add_edge, "c", "a") def test_add_edges_from_string(self): self.G.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) self.G.add_nodes_from(["d", "e", "f"]) self.G.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "b"), ("b", "c"), ("c", "a")]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.g.predecessors("a")), []) self.assertListEqual(list(self.g.predecessors("b")), ["a"]) self.assertListEqual(list(self.g.predecessors("c")), ["b"]) def test_update_node_parents(self): self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.G.predecessors("a")), []) self.assertListEqual(list(self.G.predecessors("b")), ["a"]) self.assertListEqual(list(self.G.predecessors("c")), ["b"]) def tearDown(self): del self.G
import numpy as np import pandas as pd from pgmpy.models import BayesianModel from pgmpy.estimators import BayesianEstimator # Generating random data for two coin tossing examples raw_data = np.random.randint(low=0, high=2, size=(1000, 2)) data = pd.DataFrame(raw_data, columns=['X', 'Y']) print(data) coin_model = BayesianModel() coin_model.fit(data, estimator=BayesianEstimator) coin_model.get_cpds() coin_model.nodes() coin_model.edges()
def estimate(self, start=None, tabu_length=0, max_indegree=None): """ Performs local hill climb search to estimates the `BayesianModel` structure that has optimal score, according to the scoring method supplied in the constructor. Starts at model `start` and proceeds by step-by-step network modifications until a local maximum is reached. Only estimates network structure, no parametrization. Parameters ---------- start: BayesianModel instance The starting point for the local search. By default a completely disconnected network is used. tabu_length: int If provided, the last `tabu_length` graph modifications cannot be reversed during the search procedure. This serves to enforce a wider exploration of the search space. Default value: 100. max_indegree: int or None If provided and unequal None, the procedure only searches among models where all nodes have at most `max_indegree` parents. Defaults to None. Returns ------- model: `BayesianModel` instance A `BayesianModel` at a (local) score maximum. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgmpy.estimators import HillClimbSearch, BicScore >>> # create data sample with 9 random variables: ... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI')) >>> # add 10th dependent variable ... data['J'] = data['A'] * data['B'] >>> est = HillClimbSearch(data, scoring_method=BicScore(data)) >>> best_model = est.estimate() >>> sorted(best_model.nodes()) ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] >>> best_model.edges() [('B', 'J'), ('A', 'J')] >>> # search a model with restriction on the number of parents: >>> est.estimate(max_indegree=1).edges() [('J', 'A'), ('B', 'J')] """ epsilon = 1e-8 nodes = self.state_names.keys() if start is None: start = BayesianModel() start.add_nodes_from(nodes) elif not isinstance(start, BayesianModel) or not set( start.nodes()) == set(nodes): raise ValueError( "'start' should be a BayesianModel with the same variables as the data set, or 'None'." ) tabu_list = [] current_model = start while True: best_score_delta = 0 best_operation = None for operation, score_delta in self._legal_operations( current_model, tabu_list, max_indegree): if score_delta > best_score_delta: best_operation = operation best_score_delta = score_delta if best_operation is None or best_score_delta < epsilon: break elif best_operation[0] == '+': current_model.add_edge(*best_operation[1]) tabu_list = ([('-', best_operation[1])] + tabu_list)[:tabu_length] elif best_operation[0] == '-': current_model.remove_edge(*best_operation[1]) tabu_list = ([('+', best_operation[1])] + tabu_list)[:tabu_length] elif best_operation[0] == 'flip': X, Y = best_operation[1] current_model.remove_edge(X, Y) current_model.add_edge(Y, X) tabu_list = ([best_operation] + tabu_list)[:tabu_length] return current_model