def Hybrid(dataset: pd.DataFrame): from pgmpy.estimators import MmhcEstimator from pgmpy.estimators import HillClimbSearch from pgmpy.estimators import BDeuScore, K2Score, BicScore from pgmpy.models import BayesianModel mmhc = MmhcEstimator(dataset) # The mmhc method takes a parameter significance_level(default=0.01) the desired Type 1 error probability of # falsely rejecting the null hypothesis that variables. That is, confining Type 1 error rate. # (Therefore, the lower value, the less we are gonna accept dependencies, resulting in a sparser graph.) skeleton = mmhc.mmpc() print("Part 1) Skeleton: ", skeleton.edges()) # use hill climb search to orient the edges: hc = HillClimbSearch(dataset, scoring_method=BDeuScore(dataset, equivalent_sample_size=5)) # Recording the evaluation of different iteration bdeu = BDeuScore(dataset, equivalent_sample_size=5) iter_list = [2**i for i in range(20)] eval_list = [] for iteration in iter_list: DAG_connection = hc.estimate(tabu_length=10, white_list=skeleton.to_directed().edges(), max_iter=iteration) model = BayesianModel(DAG_connection.edges()) print(bdeu.score(model)) eval_list.append(bdeu.score(model)) print("Part 2) Model: ", model.edges()) return model.edges(), [iter_list, eval_list]
def drawGraph(model: BayesianModel, nodeColor: Color = LIGHT_CORNF, edgeColor: Color = CHERRY) -> gz.Digraph: # Getting the edges (the .edges() results in NetworkX OutEdgeView object) structures: List[Tuple[Name, Name]] = list(iter(model.edges())) return edgesToGraph(edges=structures, nodeColor=nodeColor, edgeColor=edgeColor)
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get("type") == "BayesianNetwork": model = BayesianModel() model.add_nodes_from(self.probnet["Variables"].keys()) model.add_edges_from(self.probnet["edges"].keys()) tabular_cpds = [] cpds = self.probnet["Potentials"] for cpd in cpds: var = list(cpd["Variables"].keys())[0] states = self.probnet["Variables"][var]["States"] evidence = cpd["Variables"][var] evidence_card = [ len(self.probnet["Variables"][evidence_var]["States"]) for evidence_var in evidence ] arr = list(map(float, cpd["Values"].split())) values = np.array(arr) values = values.reshape((len(states), values.size // len(states))) tabular_cpds.append( TabularCPD(var, len(states), values, evidence, evidence_card) ) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet["Variables"][var].items(): model.nodes[var][prop_name] = prop_value edges = model.edges() if nx.__version__.startswith("1"): for edge in edges: for prop_name, prop_value in self.probnet["edges"][edge].items(): model.edge[edge[0]][edge[1]][prop_name] = prop_value else: for edge in edges: for prop_name, prop_value in self.probnet["edges"][edge].items(): model.adj[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def create_model_and_inference(): dep_df = pd.read_csv('dependencies.csv', sep=';') def connect(df, source, edgelist): source_df = df[df['Column2'] == source] for col in source_df.iloc[0, 3:len(source_df.columns)]: target_df = df[df['Column1'] == col]['Column2'] if not target_df.empty: target = target_df.item() if not (target, source) in edgelist: edgelist.append((source, target)) connect(df, target, edgelist) edges = [] connect(dep_df, 'myproximus-usage', edges) edges = [(t[1], t[0]) for t in edges] nodes = set(itertools.chain.from_iterable(edges)) nodes_df = dep_df.iloc[:, 1].to_frame() nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)] nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T) nodes_df = nodes_df.set_index('Column2').transpose() model = BayesianModel() model.add_nodes_from(nodes) for edge in edges: try: model.add_edge(edge[0], edge[1]) except: print('WARNING: tried to add edge which forms loop: ' + str(edge)) model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu") # for cpd in model.get_cpds(): # print(cpd) draw_network(model.nodes(), model.edges(), {}, []) return model, VariableElimination(model)
def test_estimate_from_independencies(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind) self.assertSetEqual(set(model.edges()), set([('B', 'D'), ('A', 'D'), ('C', 'D')])) model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue(set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
def test_estimate_from_independencies(self): ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"]) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies( "ABCD", ind) self.assertSetEqual(set(model.edges()), set([("B", "D"), ("A", "D"), ("C", "D")])) model1 = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"), ("C", "E")]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue( set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([("B", "C"), ("A", "C"), ("C", "E"), ("D", "B")]))
def test_estimate_from_independencies(self): ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D']) ind = ind.closure() model = ConstraintBasedEstimator.estimate_from_independencies( "ABCD", ind) self.assertSetEqual(set(model.edges()), set([('B', 'D'), ('A', 'D'), ('C', 'D')])) model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')]) model2 = ConstraintBasedEstimator.estimate_from_independencies( model1.nodes(), model1.get_independencies()) self.assertTrue( set(model2.edges()) == set(model1.edges()) or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
def Hill_Climbing(dataset: pd.DataFrame): # from pgmpy.estimators import ExhaustiveSearch from pgmpy.estimators import HillClimbSearch from pgmpy.estimators import BDeuScore, K2Score, BicScore from pgmpy.models import BayesianModel bdeu = BDeuScore(dataset, equivalent_sample_size=5) hc = HillClimbSearch(dataset, scoring_method=BDeuScore(dataset, equivalent_sample_size=5)) iter_list = [2**i for i in range(20)] eval_list = [] for iteration in iter_list: DAG_connection = hc.estimate(tabu_length=10, max_iter=iteration) model = BayesianModel(DAG_connection.edges()) print(bdeu.score(model)) eval_list.append(bdeu.score(model)) return model.edges(), [iter_list, eval_list]
def get_model(self): """ Returns the model instance of the ProbModel. Return --------------- model: an instance of BayesianModel. Examples ------- >>> reader = ProbModelXMLReader() >>> reader.get_model() """ if self.probnet.get('type') == "BayesianNetwork": model = BayesianModel(self.probnet['edges'].keys()) tabular_cpds = [] cpds = self.probnet['Potentials'] for cpd in cpds: var = list(cpd['Variables'].keys())[0] states = self.probnet['Variables'][var]['States'] evidence = cpd['Variables'][var] evidence_card = [len(self.probnet['Variables'][evidence_var]['States']) for evidence_var in evidence] arr = list(map(float, cpd['Values'].split())) values = np.array(arr) values = values.reshape((len(states), values.size//len(states))) tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card)) model.add_cpds(*tabular_cpds) variables = model.nodes() for var in variables: for prop_name, prop_value in self.probnet['Variables'][var].items(): model.node[var][prop_name] = prop_value edges = model.edges() for edge in edges: for prop_name, prop_value in self.probnet['edges'][edge].items(): model.edge[edge[0]][edge[1]][prop_name] = prop_value return model else: raise ValueError("Please specify only Bayesian Network.")
def bayesnet(): """ References: https://class.coursera.org/pgm-003/lecture/17 http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html https://github.com/pgmpy/pgmpy.git http://pgmpy.readthedocs.org/en/latest/ http://nipy.bic.berkeley.edu:5000/download/11 """ # import operator as op # # Enumerate all possible events # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list)) # _esdat = list(ut.iprod(*map(range, varcard_list))) # _escol = list(map(op.attrgetter('variable'), cpd_list)) # event_space = pd.DataFrame(_esdat, columns=_escol) # # Custom compression of event space to inspect a specific graph # def compress_space_flags(event_space, var1, var2, var3, cmp12_): # """ # var1, var2, cmp_ = 'Lj', 'Lk', op.eq # """ # import vtool as vt # data = event_space # other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3]) # case_flags12 = cmp12_(data[var1], data[var2]).values # # case_flags23 = cmp23_(data[var2], data[var3]).values # # case_flags = np.logical_and(case_flags12, case_flags23) # case_flags = case_flags12 # case_flags = case_flags.astype(np.int64) # subspace = np.hstack((case_flags[:, None], data[other_cols].values)) # sel_ = vt.unique_row_indexes(subspace) # flags = np.logical_and(mask, case_flags) # return flags # # Build special cases # case_same = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)] # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)] # special_cases = [ # case_same, # case_diff, # ] from pgmpy.factors import TabularCPD from pgmpy.models import BayesianModel import pandas as pd from pgmpy.inference import BeliefPropagation # NOQA from pgmpy.inference import VariableElimination # NOQA name_nice = ['n1', 'n2', 'n3'] score_nice = ['low', 'high'] match_nice = ['diff', 'same'] num_names = len(name_nice) num_scores = len(score_nice) nid_basis = list(range(num_names)) score_basis = list(range(num_scores)) semtype2_nice = { 'score': score_nice, 'name': name_nice, 'match': match_nice, } var2_cpd = { } globals()['semtype2_nice'] = semtype2_nice globals()['var2_cpd'] = var2_cpd name_combo = np.array(list(ut.iprod(nid_basis, nid_basis))) combo_is_same = name_combo.T[0] == name_combo.T[1] def get_expected_scores_prob(level1, level2): part1 = combo_is_same * level1 part2 = (1 - combo_is_same) * (1 - (level2)) expected_scores_level = part1 + part2 return expected_scores_level # def make_cpd(): def name_cpd(aid): from pgmpy.factors import TabularCPD cpd = TabularCPD( variable='N' + aid, variable_card=num_names, values=[[1.0 / num_names] * num_names]) cpd.semtype = 'name' return cpd name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')] var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds))) if True: num_same_diff = 2 samediff_measure = np.array([ # get_expected_scores_prob(.12, .2), # get_expected_scores_prob(.88, .8), get_expected_scores_prob(0, 0), get_expected_scores_prob(1, 1), ]) samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist() def samediff_cpd(aid1, aid2): cpd = TabularCPD( variable='A' + aid1 + aid2, variable_card=num_same_diff, values=samediff_vals, evidence=['N' + aid1, 'N' + aid2], # [::-1], evidence_card=[num_names, num_names]) # [::-1]) cpd.semtype = 'match' return cpd samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')] var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds))) if True: def score_cpd(aid1, aid2): semtype = 'score' evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2] evidence_cpds = [var2_cpd[key] for key in evidence] evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds] evidence_card = list(map(len, evidence_nice)) evidence_states = list(ut.iprod(*evidence_nice)) variable_basis = semtype2_nice[semtype] variable_values = [] for mystate in variable_basis: row = [] for state in evidence_states: if state[0] == state[1]: if state[2] == 'same': val = .2 if mystate == 'low' else .8 else: val = 1 # val = .5 if mystate == 'low' else .5 elif state[0] != state[1]: if state[2] == 'same': val = .5 if mystate == 'low' else .5 else: val = 1 # val = .9 if mystate == 'low' else .1 row.append(val) variable_values.append(row) cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=len(variable_basis), values=variable_values, evidence=evidence, # [::-1], evidence_card=evidence_card) # [::-1]) cpd.semtype = semtype return cpd else: score_values = [ [.8, .1], [.2, .9], ] def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['A' + aid1 + aid2], # [::-1], evidence_card=[num_same_diff]) # [::-1]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds + samediff_cpds else: score_measure = np.array([get_expected_scores_prob(level1, level2) for level1, level2 in zip(np.linspace(.1, .9, num_scores), np.linspace(.2, .8, num_scores))]) score_values = (score_measure / score_measure.sum(axis=0)).tolist() def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['N' + aid1, 'N' + aid2], evidence_card=[num_names, num_names]) cpd.semtype = 'score' return cpd score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')] cpd_list = name_cpds + score_cpds pass input_graph = [] for cpd in cpd_list: if cpd.evidence is not None: for evar in cpd.evidence: input_graph.append((evar, cpd.variable)) name_model = BayesianModel(input_graph) name_model.add_cpds(*cpd_list) var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list))) globals()['var2_cpd'] = var2_cpd varnames = [cpd.variable for cpd in cpd_list] # --- PRINT CPDS --- cpd = score_cpds[0] def print_cpd(cpd): print('CPT: %r' % (cpd,)) index = semtype2_nice[cpd.semtype] if cpd.evidence is None: columns = ['None'] else: basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence] columns = [','.join(x) for x in ut.iprod(*basis_lists)] data = cpd.get_cpd() print(pd.DataFrame(data, index=index, columns=columns)) for cpd in name_model.get_cpds(): print('----') print(cpd._str('phi')) print_cpd(cpd) # --- INFERENCE --- Ni = name_cpds[0] event_space_combos = {} event_space_combos[Ni.variable] = 0 # Set ni to always be Fred for cpd in cpd_list: if cpd.semtype == 'score': event_space_combos[cpd.variable] = list(range(cpd.variable_card)) evidence_dict = ut.all_dict_combinations(event_space_combos) # Query about name of annotation k given different event space params def pretty_evidence(evidence): return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val]) for key, val in evidence.items()] def print_factor(factor): row_cards = factor.cardinality row_vars = factor.variables values = factor.values.reshape(np.prod(row_cards), 1).flatten() # col_cards = 1 # col_vars = [''] basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards])))) nice_basis_lists = [] for varname, basis in zip(row_vars, basis_lists): cpd = var2_cpd[varname] _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis) nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis] nice_basis_lists.append(nice_basis) row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)] print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val')) # name_belief = BeliefPropagation(name_model) name_belief = VariableElimination(name_model) import pgmpy import six # NOQA def try_query(evidence): print('--------') query_vars = ut.setdiff_ordered(varnames, list(evidence.keys())) evidence_str = ', '.join(pretty_evidence(evidence)) probs = name_belief.query(query_vars, evidence) factor_list = probs.values() joint_factor = pgmpy.factors.factor_product(*factor_list) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')') # print(six.text_type(joint_factor)) factor = joint_factor # NOQA # print_factor(factor) # import utool as ut print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list])) for evidence in evidence_dict: try_query(evidence) evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0} try_query(evidence) evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0} try_query(evidence) globals()['score_nice'] = score_nice globals()['name_nice'] = name_nice globals()['score_basis'] = score_basis globals()['nid_basis'] = nid_basis print('Independencies') print(name_model.get_independencies()) print(name_model.local_independencies([Ni.variable])) # name_belief = BeliefPropagation(name_model) # # name_belief = VariableElimination(name_model) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # probs = name_belief.query(['Lk'], evidence) # factor = probs['Lk'] # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Lj'] = name_nice[evidence['Lj']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip(name_nice, probs.tolist())) # ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format( # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # for case in special_cases: # test_data = case.drop('Lk', axis=1) # test_data = test_data.drop('Lj', axis=1) # test_data = test_data.reset_index(drop=True) # print('----') # for i in range(test_data.shape[0]): # evidence = test_data.loc[i].to_dict() # query_vars = ['Lk', 'Lj'] # probs = name_belief.query(query_vars, evidence) # for queryvar in query_vars: # factor = probs[queryvar] # print(factor._str('phi')) # probs = factor.values # evidence_ = evidence.copy() # evidence_['Li'] = name_nice[evidence['Li']] # evidence_['Sij'] = score_nice[evidence['Sij']] # evidence_['Sjk'] = score_nice[evidence['Sjk']] # nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist())) # ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format( # query_var=query_var, # evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)), # cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val') # )) # _ draw model import plottool as pt import networkx as netx fig = pt.figure() # NOQA fig.clf() ax = pt.gca() netx_nodes = [(node, {}) for node in name_model.nodes()] netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()] netx_graph = netx.DiGraph() netx_graph.add_nodes_from(netx_nodes) netx_graph.add_edges_from(netx_edges) # pos = netx.graphviz_layout(netx_graph) pos = netx.pydot_layout(netx_graph, prog='dot') netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True) pt.plt.savefig('foo.png') ut.startfile('foo.png')
'''Model1''' model1 = BayesianModel([('x1', 'x2'),('x2', 'x5'),('x4','x1'),('x4','x6'),('x6','x3')]) model1.add_cpds(p_21,p_52,p_14,p_64,p_36,p4) cpd1=[] cpd1.append(p_21) cpd1.append(p_52) cpd1.append(p_14) cpd1.append(p_64) cpd1.append(p_36) cpd1.append(p4) model1.add_cpds(*cpd1) print("------------------------------------------") print("Edges of model1:", model1.edges()) print("Checking Model1:", model1.check_model()) print("------------------------------------------") '''generate data for model1''' inference = BayesianModelSampling(model1) data=inference.forward_sample(size=3000, return_type='dataframe') print("Data for model1:") print(data) k2=K2Score(data) print('Model1 K2 Score: ' + str(k2.score(model1))) '''Inference''' from pgmpy.inference import VariableElimination infer = VariableElimination(model1) print("Inference of x3:") print(infer.query(['x3']) ['x3'])
# ################################################################################### from pgmpy.models import BayesianModel model = BayesianModel() # Add nodes to empty bayesian model # ------------------------------------------------------ ( Traffic Accident -> traffic_jam ) # ------------------------------------------------------ ( Heavy Rain -> traffic_jam ) model.add_nodes_from(['rain', 'traffic_jam']) model.add_edge('rain', 'traffic_jam') # If add edge without adding node, node will be automatically added "Example: " model.add_edge('accident', 'traffic_jam') model.nodes() " ['accident', 'rain', 'traffic_jam'] " model.edges() " [('rain', 'traffic_jam'), ('accident', 'traffic_jam')}" # two edges showed # each node has an associated CPD with it. from pgmpy.factor import TabularCPD cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]]) cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]]) cpd_traffic_jam = TabularCPD('traffic_jam', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evdience=['rain', 'accident'], evidence_card=[2, 2]) # associate each CPD to model model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam) model.get_cpds() " [<TabularCPD representing P(rain: 2) at fsjidfsjdfaskdf>, " " [<TabularCPD representing P(accident: 2) at fsxfgsdfgfsjdfaskdf>, "
from pgmpy.readwrite.BIF import BIFWriter import pandas as pd import numpy as np from time import time import graphviz as gv import os train = pd.read_csv('../msnbcWithHeader.csv', sep=',') train = train[train.sum(axis=1) < 200] train[train > 1] = 1 train_start = time() bic = BicScore(train) hc = HillClimbSearch(train, scoring_method=bic) best_model = hc.estimate(prog_bar=True) edges = best_model.edges() model = BayesianModel(edges) model.fit(train, estimator=BayesianEstimator, prior_type="BDeu") variables = model.nodes() print(model.edges()) train_end = time() - train_start print("train time " + str(train_end)) my_graph = gv.Digraph(format='png') for node in variables: my_graph.node(node) for edge in edges: my_graph.edge(edge[0], edge[1]) filename = my_graph.render('../graph', view=True)
def main(): andPGM = PGM_t() print('loading features..') train_set, test_set = andPGM.load_features() print('loading features.. Done') # Bayesian network of 19 nodes, 9*2 variables of network given # Initial incomplete Bayesian model connected manually based on intuition print('Generating model.. ') initialModel = BayesianModel({}) initialModel.add_nodes_from(andPGM.img_features.columns[1:10].tolist()) initialModel.add_edges_from([('f6_a' , 'f2_a'),\ ('f3_a' , 'f4_a') ,\ ('f5_a' , 'f9_a') ,\ ('f4_a' , 'f7_a') ]) # Use hill climb search algorithm to find network structure of initial 9 nodes hc = HillClimbSearch(data=andPGM.img_features.iloc[0:,1:10], \ scoring_method=BdeuScore(andPGM.img_features.iloc[0:,1:10], \ equivalent_sample_size=0.1*len(andPGM.img_features)), \ state_names = andPGM.states_9) # Get best estimated structure best_model = hc.estimate(start=initialModel) # Edges in the acquired graph print('model of 9 var: ', best_model.edges()) # Create a Clone of generated Bayesian network structure clone_model = BayesianModel({}) for edge in best_model.edges(): new_edge = [edge[0][:-1] + 'b', edge[1][:-1] + 'b'] clone_model.add_edges_from([new_edge]) # Join together the Original and clone network through node 'same' multinetModel = BayesianModel({}) multinetModel.add_edges_from(best_model.edges() + clone_model.edges()) multinetModel.add_node('same') multinetModel.add_edge('f5_a', 'same') multinetModel.add_edge('f9_a', 'same') multinetModel.add_edge('f5_b', 'same') multinetModel.add_edge('f9_b', 'same') print('Generating model.. Done') # Edges in the final structure print('Final model: ', multinetModel.edges()) print('Fit data into model..') # fit the data to model to generate CPDs using maximum likelyhood estimation multinetModel.fit(data=train_set, state_names=andPGM.states_all) print('Fit data into model.. Done') print('CPDs generated: ') cpds = multinetModel.get_cpds() for cpd in cpds: print(cpd) # Inference using Variable Elimination print('Start inference..') inference = VariableElimination(multinetModel) train_set_same = train_set[train_set['same'] == 0] train_set_not_same = train_set[train_set['same'] == 1] # Accuracy of positive inferences acc_same = andPGM.chk_accuracy( train_set_same, inference, variables=train_set_same.columns[0:9].tolist(), evidence=train_set_same.columns[9:19].tolist()) print('accuracy of positives ', acc_same) # Accuracy of negative inferences acc_nt_same = andPGM.chk_accuracy( train_set_not_same, inference, variables=train_set_not_same.columns[0:9].tolist(), evidence=train_set_not_same.columns[9:19].tolist()) print('accuracy of negatives', acc_nt_same)
def task4(): global andRawData, task4_best_bm k2Scores = [] andRawData_temp = pd.DataFrame(andRawData.values, columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9']) #Model 1 est = HillClimbSearch(andRawData_temp, scoring_method=K2Score(andRawData_temp)) model_temp = est.estimate() estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 1: Model through HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 1: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 2: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f1', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f6'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 2: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 2: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 3: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f5', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 3: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 3: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 4: Manual Model based on HillClimbSearch model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f5', 'f7'), ('f5', 'f3'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f8'),]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 4: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 4: K2 Accuracy Score is "+str(k2Scores_temp)) #Model 5: Manual Model based on Intuition model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f4', 'f7'), ('f1', 'f2'), ('f8', 'f5'), ('f9', 'f6'), ('f9', 'f8')]) estimator = BayesianEstimator(model_temp, andRawData_temp) for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']: cpd_fx = estimator.estimate_cpd(fx, prior_type="K2") model_temp.add_cpds(cpd_fx) task4_bms.append(model_temp) print(" Model 5: Manual Model based on HillClimbSearch is : "+str(model_temp.edges())) k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000)) k2Scores_temp = k2Score.score(model_temp) k2Scores.append(k2Scores_temp) print(" Model 5: K2 Accuracy Score is "+str(k2Scores_temp)) task4_best_bm = task4_bms[k2Scores.index(max(k2Scores))] print(" Best Bayesian Model with the highest accuracy score is thus Model "+str(1+k2Scores.index(max(k2Scores))))
from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD from pgmpy.inference import VariableElimination cancer_model = BayesianModel([('Pollution', 'Cancer'), ('Smoker', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea')]) print('Bayesian network models are :') print('\t', cancer_model.nodes()) print('Bayesian edges are:') print('\t', cancer_model.edges()) cpd_poll = TabularCPD(variable='Pollution', variable_card=2, values=[[0.9], [0.1]]) cpd_smoke = TabularCPD(variable='Smoker', variable_card=2, values=[[0.3], [0.7]]) cpd_cancer = TabularCPD(variable='Cancer', variable_card=2, values=[[0.03, 0.05, 0.001, 0.02], [0.97, 0.95, 0.999, 0.98]], evidence=['Smoker', 'Pollution'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='Xray', variable_card=2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Cancer'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2, values=[[0.65, 0.3], [0.35, 0.7]],
evidence_card=[2]) # Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # Checking if the cpds are valid for the model. print(cancer_model.check_model()) # Check d-separations. This is only meant for those interested. You do not need to understand this to do the project. print(cancer_model.is_active_trail('Pollution', 'Smoker')) print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])) print(cancer_model.local_independencies('Xray')) print(cancer_model.get_independencies()) # Print model information print(cancer_model.edges()) print(cancer_model.nodes()) print(cancer_model.get_cpds()) # Doing exact inference using Variable Elimination from pgmpy.inference import VariableElimination cancer_infer = VariableElimination(cancer_model) # Query print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0})) print( cancer_infer.query(variables=['Cancer'], evidence={ 'Smoker': 0, 'Pollution': 0
lines = list(csv.reader(open('data7_names.csv', 'r'))); attributes = lines[0] #Read Cleveland Heart dicease data heartDisease = pd.read_csv('data7_heart.csv', names = attributes) heartDisease = heartDisease.replace('?', np.nan) # Model Baysian Network model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), ('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'), ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')]) print('\nBayesian Network Nodes are: ') print('\t',model.nodes()) print('\nBayesian Network Edges are:') print('\t',model.edges()) # Learning CPDs using Maximum Likelihood Estimators print('\nLearning CPDs using Maximum Likelihood Estimators...'); model.fit(heartDisease, estimator=MaximumLikelihoodEstimator) # Inferencing with Bayesian Network print('\nInferencing with Bayesian Network:') HeartDisease_infer = VariableElimination(model) # Computing the probability of bronc given smoke. print('\n1.Probability of HeartDisease given Age=28') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28}) print(q['heartdisease']) print('\n2. Probability of HeartDisease given chol (Cholestoral) =100') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD from pgmpy.inference import VariableElimination test_model = BayesianModel([('rain', 'sprinkler'),('rain','wetGrass'),('sprinkler','wetGrass')]) cpd_rain = TabularCPD(variable='rain', variable_card=2, values=[[0.2],[0.8]]) cpd_wetGrass = TabularCPD(variable='wetGrass', variable_card=2, values=[(0,0.8,0.9,0.99),(1,0.2,0.1,0.01)], evidence=['rain','sprinkler'], evidence_card=[2, 2]) cpd_sprinkler = TabularCPD(variable='sprinkler', variable_card=2, values=[(0.4,0.01),(0.6,0.99)],evidence=['rain'], evidence_card=[2]) test_model.add_cpds(cpd_rain,cpd_sprinkler,cpd_wetGrass) test_model.edges() test_model.nodes() test_model.check_model() # print('hello') test_infer = VariableElimination(test_model) q = test_infer.query(variables=['wetGrass'], evidence={'rain':1}) result = q['wetGrass'] string = str(q['wetGrass']) print(result.values[0]) # print(type(string)) """
random_indices = sample(range(row_size), 2000) smallDF = data.iloc[random_indices, :] smallDF.shape PseudoCounts = {} #Pseudocounts are given (1,1) for uniform for productName in smallDF.columns: PseudoCounts[productName] = [1, 1] print('Existing network not found') est = ConstraintBasedEstimator(smallDF) print('Starting to estimate the model structure, might take a while...') start = time.time() model = est.estimate(significance_level=0.05) end = time.time() print('Time spent to estimate model structure {0}'.format(end - start)) print('Edges of the model:') print(model.edges()) print('Starting to estimate model parameters..') start = time.time() model.fit(smallDF, estimator=BayesianEstimator, prior_type='dirichlet', pseudo_counts=PseudoCounts) end = time.time() print('Time spent to estimete the model parameters {0}'.format(end - start)) #Save edge ,node, CPD information Edges = model.edges() Nodes = model.nodes() CPD = model.get_cpds() with open("Edges.txt", "wb") as fp: pickle.dump(Edges, fp)
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128 ] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
# data_size = len(data) model = BayesianModel() list_edges = [] for i in range(3): list_edges += [('DI' + str(i), 'DFT' + str(i)), ('TQ', 'DFT' + str(i)), ('DI' + str(i), 'RD' + str(i)), ('DFT' + str(i), 'RD' + str(i)), ('RD' + str(i), 'DFO' + str(i)), ('OU', 'DFO' + str(i))] list_edges += [('RD0', 'DI1'), ('RD1', 'DI2'), ('DPQ', 'DI0'), ('C', 'DI0')] model.add_edges_from(list_edges) model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10) for edge in model.edges(): print(edge) print("\n") infer = VariableElimination(model) nodes = model.nodes() Distribution = {} for key in pr.keys(): Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)] nodes.remove(key) print('pr done') for key in nodes: Distribution[key] = infer.query([key], evidence = pr)[key].values print('done' + key)
values=[[0.998], [0.002]]) cpd_alarm = TabularCPD(variable='Alarm', variable_card=2, values=[[0.999, 0.71, 0.06, 0.05], [0.001, 0.29, 0.94, 0.95]], evidence=['Burglary', 'Earthquake'], evidence_card=[2, 2]) cpd_johncalls = TabularCPD(variable='JohnCalls', variable_card=2, values=[[0.95, 0.1], [0.05, 0.9]], evidence=['Alarm'], evidence_card=[2]) cpd_marycalls = TabularCPD(variable='MaryCalls', variable_card=2, values=[[0.1, 0.7], [0.9, 0.3]], evidence=['Alarm'], evidence_card=[2]) # Associating the parameters with the model structure alarm_model.add_cpds(cpd_burglary, cpd_earthquake, cpd_alarm, cpd_johncalls, cpd_marycalls) #new cell alarm_model.check_model() #new cell alarm_model.nodes() #new cell alarm_model.edges() #new cell alarm_model.local_independencies('Burglary') #new cell alarm_model.local_independencies('JohnCalls')
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual( hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
class Utilities(object): def __init__(self, file): ''' no object creation -> opportune ?''' self.keywords = ['BENS', 'MEMS', 'LANS', 'MOTOR', 'WORLD'] self.standard_nodes = { 'RONS': { 'BENS': [], 'MEMS': [] }, 'LANS': { 'LANS': [] }, 'LENS': { 'MOTOR': [], 'WORLD': [] } } self.file = file self.get_json_path(file) self.pgmpy_object = BayesianModel() self.networkx_object = nx.DiGraph() self.header = '' self.dictionary = [] def get_nodes_in_family(self, family, attributes=False): nw_nodes = self.networkx_object.nodes() nw_dim = np.asarray(nw_nodes).ndim nodes = [] for i, node in enumerate(nw_nodes): if nw_dim > 1: node = node[0] if family in node: nodes.append(node) return nodes def check_json_path(directory): """ Checks whether the necessary project_repository directory exists. If not, creates it :param directory: the mother directory to search from downwards :type directory: string :rtype : none """ if not os.path.exists(directory + '\project_repository\\'): os.makedirs(directory + '\project_repository\\') def get_json_path(self, file): """ Creates a string containing the full path for the filename passed so it will be saved in the project_repository directory :param filename: filename without path or extension :return: a full path for the file :type filename :string :rtype : string """ levels = 5 common = os.path.dirname(os.path.realpath(__file__)) for i in range(levels + 1): common = os.path.dirname(common) if 'peepo\peepo' not in common: break Utilities.check_json_path(common) self.file = str(common + '\project_repository\\' + file + '.json') print('in get_json_path :', self.file) def save_json(self, astring): """ This helping function is only needed to have the json file formatted in a user friendly way as the "dump" method does not provide a lot of possibilities to get it "pretty" :param file :the ull path of the json file :param astring: the name of the string containing the whole information :return: void :type file: string :type astring : string :rtype : void """ text_file = open(str(self.file), "w") '''remove all LF written by the dump method''' astring = re.sub('\n', '', astring) '''For keywords -> insert LF and tabs''' astring = re.sub('\"Identification', '\n\"Identification', astring) astring = re.sub('\"Date', '\n\"Date', astring) astring = re.sub('\"Description', '\n\"Description', astring) astring = re.sub('\"Train_from', '\n\"Train_from', astring) astring = re.sub('\"Frozen', '\n\"Frozen', astring) astring = re.sub('\"Nodes', '\n\n\"Nodes', astring) astring = re.sub('\"RONS', '\n\t\t\"RONS', astring) astring = re.sub('\"BENS', '\n\t\t\t\"BENS', astring) astring = re.sub('\"MEMS', '\n\t\t\t\"MEMS', astring) astring = re.sub('\"LANS', '\n\t\t\"LANS', astring) astring = re.sub('\"LENS', '\n\t\t\"LENS', astring) astring = re.sub('\"MOTOR', '\n\t\t\t\"MOTOR', astring) astring = re.sub('\"WORLD', '\n\t\t\t\"WORLD', astring) astring = re.sub('\"Edges', '\n\n\"Edges', astring) astring = re.sub('\"CPDs', '\n\n\"CPDs', astring) astring = re.sub('{', '\n\t\t{', astring) text_file.write(astring) text_file.write('\n') text_file.close() def translation(self, astring, from_man_to_machine): """ Given an array of tuples (a,b) in dictionary, returns the second element of the tuple where astring was found Is used to not loose the users node names as peepo generates standardized names for the corresponding node :param dictionary:an array of tuples -> is created in the method : get_network(file) :param astring: the name of the node passsed by the user :param from_man_to_machine: an integer -> 0 when we want the translation for the user give name to the standardized name, 1 the other way around :return: the corresponding standardized node name :type dictionary: np.array :type astring : string :rtype : string """ source = 0 target = 1 if from_man_to_machine == 1: source = 1 target = 0 for index, item in enumerate(self.dictionary): if item[source] == astring: break return item[target] def clean_edge_list(self, edge_array, parent): '''the get functions for the edges, both in networx as pgmpy contain the parent name this function removes it from the list''' cleaned_list = [] for a in edge_array: if a != parent: cleaned_list.append(a) return cleaned_list def clean_parent_list(self, parent_array, child): '''the get functions for the edges, both in networx as pgmpy contain the parent name this function removes it from the list''' cleaned_list = [] for i, a in enumerate(parent_array): if a[0] != child: cleaned_list.append(a[0]) return cleaned_list def get_edges(self): """ Creates a dictionary with a node as a key and an array with its child as value (the methods get_child give generally a list of tuples (parent,child) :param pgmpy_object: the pgmpy network :return: a dictionary with the edges of all the node :type fpgmpy_object:adress :rtype :dictionary """ edg = self.pgmpy_object.edges() edges = dict() [ edges[str(t[0])].append(str(t[1])) if t[0] in list(edges.keys()) else edges.update({str(t[0]): [str(t[1])]}) for t in edg ] return edges def get_nodes_and_attributes(self): """ Creates an array of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's and the key cardinality returns an int the key cpd a 2 dimensional matrix :param pgmpy_object: the pgmpy network :return: array of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's :type :pgmpy_object:adress :rtype :array of tuples """ nodes = self.pgmpy_object.nodes() nod_and_attributes = [] [ nod_and_attributes.append((str(node), { 'cardinality': int(self.pgmpy_object.get_cardinality(node)), 'cpd': self.pgmpy_object.get_cpds(node).values.astype(float) })) for i, node in enumerate(nodes) ] #need to reshape the cpds when more than 1 parent for i, node in enumerate(nod_and_attributes): shape = nod_and_attributes[i][1]['cpd'].shape dimension = nod_and_attributes[i][1]['cpd'].ndim if dimension > 2: col = int(np.prod(shape) / shape[0]) nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][ 'cpd'].reshape(shape[0], col) nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][ 'cpd'].tolist() return nod_and_attributes def translate_pgmpy_to_digraph(self): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.networkx_object = nx.DiGraph() edges = self.pgmpy_object.edges() nodes_and_attributes = self.get_nodes_and_attributes() self.networkx_object.add_nodes_from(nodes_and_attributes) self.networkx_object.add_edges_from(edges) return def update_networkx(self, networkx, dic, header): self.header = header self.dictionary = dic self.networkx_object = networkx def update_pgmpy(self, pgmpy, dic, header): self.header = header self.dictionary = dic self.pgmpy_object = pgmpy def save_pgmpy_network(self): """ Saves the passed pgmpy_object class object in a json file """ self.translate_pgmpy_to_digraph() self.save_network() return def translate_digraph_to_pgmpy(self, digraf): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.pgmpy_object, x, y = self.get_pgmpy_network(from_object=True, digraph=digraf) return self.pgmpy_object def translate_pgmpy_to_digraph(self): """ Converts a pgmpy network into a networkx network :param pgmpy_object: the pgmpy network :return networkx : networkx network :type :pgmpy_object:adress :rtype :networkx:adress """ self.networkx_object = nx.DiGraph() edges = self.pgmpy_object.edges() nodes_and_attributes = self.get_nodes_and_attributes() self.networkx_object.add_nodes_from(nodes_and_attributes) self.networkx_object.add_edges_from(edges) return def save_network(self): """ Saves the passed networkx class object in a json file """ data = self.get_empty_canvas() data["header"] = self.header nw_nodes = self.networkx_object.nodes(data=True) nw_edges = self.networkx_object.edges() keywords = self.keywords nodes = copy.deepcopy( self.standard_nodes ) #{'RONS': {'BENS': [], 'MEMS': []}, 'LANS': {'LANS': []}, 'LENS': {'MOTOR': [], 'WORLD': []}} edges = [] cpds = [] '''adding edges''' for i, node in enumerate(nw_nodes): node_name = node[0] childs = [] for k, edge in enumerate(nw_edges): if edge[0] == node_name: childs.append(self.translation(edge[1], 1)) if len(childs) != 0: edges.append({self.translation(node_name, 1): childs}) for i, node in enumerate(nw_nodes): node_name = node[0] cardinality = node[1]['cardinality'] cpd = node[1]['cpd'] for pseudonym in keywords: if pseudonym in node_name: node_name_ = self.translation(node_name, 1) if pseudonym == 'BENS' or pseudonym == 'MEMS': nodes['RONS'][pseudonym].append( [node_name_, cardinality]) if pseudonym == 'LANS': nodes['LANS'][pseudonym].append( [node_name_, cardinality]) if pseudonym == 'MOTOR' or pseudonym == 'WORLD': nodes['LENS'][pseudonym].append( [node_name_, cardinality]) cpds.append({self.translation(node_name, 1): cpd}) data['Nodes'] = nodes data['Edges'] = edges data['CPDs'] = cpds data['header']['Date'] = datetime.datetime.now().strftime("%c") self.save_json(json.dumps(data)) return def get_pgmpy_network(self, from_object=False, digraph=None): """ Reads the passed json file and translates it's content to the passed pgmpy class object - uses the get_network(file) to read the json file in a networkx format and translate this to pgmpy - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)] :param file: : filename without path or extension :pgmp_object : the pgmpy object which will be completed :return: a dictionary as an array of tuples and the header of the json file :type file : string :type pgmp_object : pgmpy class object :rtype : array of tuples, dictionary CAUTION : the method does not perform a check() on the constructed DAG ! -> has to be done in the calling module """ self.pgmpy_object = BayesianModel() if not (from_object): network, dictionary, header = self.get_network() else: network = digraph nw_nodes = network.nodes(data=True) nw_edges = network.edges() '''adding nnodes and edges''' for i, node in enumerate(nw_nodes): node_name = node[0] self.pgmpy_object.add_node(node_name) for k, edge in enumerate(nw_edges): if edge[0] == node_name: self.pgmpy_object.add_edge(node_name, edge[1]) '''add cpd's''' for i, node in enumerate(nw_nodes): parent_nodes = network.in_edges(node[0]) parent_nodes = self.clean_parent_list(parent_nodes, node[0]) cpd = node[1]['cpd'] ''' find the cardinality of the node ''' cardinality_node = node[1]['cardinality'] """ cardinality card of parents has to be determined""" cardinality_parents = [] for i, nod in enumerate(parent_nodes): cardinality_parents.append(network.node[nod]['cardinality']) ''' Depending on the place in the BN and/or the number of parents the PGMPY CPD methods have another call''' if len(cardinality_parents) == 0: self.pgmpy_object.add_cpds( TabularCPD(variable=node[0], variable_card=cardinality_node, values=[cpd])) continue table = TabularCPD(variable=node[0], variable_card= cardinality_node, values=cpd, \ evidence=parent_nodes,\ evidence_card=np.asarray(cardinality_parents)) self.pgmpy_object.add_cpds(table) '''------TO DELETE-------------''' # pgmpy_object.check_model() # draw_network(pgmpy_object) '''-----------------------------''' return self.pgmpy_object, self.dictionary, self.header def get_network(self): """ Reads the passed json file and translate it's content in a networkx class object - The nodes in the object are renamed so they have a standardized signature - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)] :param file: : filename without path or extension :return: a networkx class object, dictionary as an array of tuples and the header of the json file :type file : string :rtype : networkx class object, array of tuples, dictionary """ self.dictionary = [] self.networkx_object = nx.DiGraph() with open(self.file) as f: data = f.read() '''Remove possible non informative characters''' data = re.sub('\n', '', data) data = re.sub('\t', '', data) data = json.loads(data) self.header = data['header'] '''Feeding G with the nodes''' cardinality = {} for key in data['Nodes'].keys(): for secondkey in data['Nodes'][key].keys(): for c, n in enumerate(data['Nodes'][key][secondkey]): node = secondkey + "_" + str(c) self.networkx_object.add_node(node, { 'cardinality': n[1], 'cpd': [] }) self.dictionary.append((n[0], node)) cardinality.update( {node: n[1]} ) #this contains the cardinality of each node with the node name as dictionary entry '''Feeding G with the edges''' edges = [] for j, pair in enumerate(data['Edges']): for parent in pair.keys(): for child in data['Edges'][j][parent]: parent_ = self.translation(parent, 0) child_ = self.translation(child, 0) edges.append((parent_, child_)) np.ravel(edges) self.networkx_object.add_edges_from(edges) '''Feeding G with the CPD's as nodes attributes''' for j, node in enumerate(data['CPDs']): for parent, cpd in node.items(): node_ = self.translation(parent, 0) self.networkx_object.node[node_]['cpd'] = cpd '''TO REMOVE LATER''' # plt.figure(figsize=(10, 5)) # pos = nx.circular_layout(G, scale=2) # node_labels = nx.get_node_attributes(G, 'cpd') # nx.draw(G, pos, node_size=1200, node_color='lightblue', # linewidths=0.25, font_size=10, font_weight='bold', with_labels=True) # plt.show() return self.networkx_object, self.dictionary, self.header def create_json_file(self, **kwargs): """ EWAMPLE : A helping method if the user prefers to create the BN within the code :param case_name: the file name without path or extension where the json file will be saved :param : **kwargs takes the following variables: description = kwargs.get('description', '') train_from = kwargs.get('train_from', '') cpds = kwargs.get('CPDs', []) bens = kwargs.get('BENS',[]) mems = kwargs.get('MEMS', []) lans = kwargs.get('LANS', []) motors = kwargs.get('MOTORS', []) world = kwargs.get('WORLD', []) edges = kwargs.get('Edges', []) frozen = kwargs.get('frozen',False) . . . :return: void :type case_name : string :type : . . . :rtype : void """ description = kwargs.get('description', '') train_from = kwargs.get('train_from', '') cpds = kwargs.get('CPDs', []) bens = kwargs.get('BENS', []) mems = kwargs.get('MEMS', []) lans = kwargs.get('LANS', []) motors = kwargs.get('MOTORS', []) world = kwargs.get('WORLD', []) edges = kwargs.get('Edges', []) frozen = kwargs.get('frozen', False) #json_tab_file_write = JSONTabIndentFileWriter( Case_name,5a) data = self.get_empty_canvas() ''' - the 3 next items are for tracking purpose only, not fundamentally necessary''' data["header"]['Identification'] = self.file data["header"]['Date'] = datetime.datetime.now().strftime("%c") data["header"]['Description'] = description ''' - the next item gives a file containing possible training data (OPTIONAL)''' data["header"]['Train_from'] = train_from ''' Frozen tells whether or not the model can be considered as final i.e. is there still "training" needed''' data["header"]['Frozen'] = frozen ''' - the 5 next lines tells how much nodes and their names + cardinality the model will start with the names can be any valid python string''' bens = [['pooping', 2], ['peeing', 2], ['constipated', 2]] mems = [['havenotoiletpaper', 2]] lans = [['diarhea', 2], ['happypoop', 2]] motors = [['asshole1', 2], ['asshole2', 2]] world = [['toilet1', 2], ['toilet2', 2], ['garden1', 2], ['garden2', 2], ['doctor', 2]] ''' - the next items describe the edges as a dictionary -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs''' edges = [] ''' !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty''' edges.append({'pooping': ['toilet1', 'diarhea', 'happypoop']}) edges.append({'peeing': ['toilet2', 'garden1', 'garden2']}) edges.append({'constipated': ['doctor']}) edges.append({'havenotoiletpaper': ['garden1', 'garden2']}) edges.append( {'diarhea': ['toilet1', 'doctor', 'asshole1', 'asshole2']}) edges.append( {'happypoop': ['garden1', 'garden2', 'asshole1', 'asshole2']}) ''' - the next items describe the CPD's as a dictionary -> the dictionary entry is the corresponding node''' cpds = [] cpds.append({'pooping': [0.5, 0.5]}) cpds.append({'peeing': [0.2, 0.8]}) cpds.append({'constipated': [0.9, 0.1]}) cpds.append({'havenotoiletpaper': [0.6, 0.4]}) cpds.append({'happypoop': [[0.3, 0.8], [0.7, 0.2]]}) cpds.append({'diarhea': [[0.8, 0.3], [0.2, 0.7]]}) cpds.append({'toilet1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'asshole1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'asshole2': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({'toilet2': [[0.5, 0.5], [0.5, 0.5]]}) cpds.append({'doctor': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]}) cpds.append({ 'garden1': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5], [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]] }) cpds.append({ 'garden2': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5], [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]] }) ''' - feeding the data''' data["Nodes"]['RONS']['BENS'] = bens data["Nodes"]['RONS']['MEMS'] = mems data["Nodes"]['LANS']['LANS'] = lans data["Nodes"]['LENS']['MOTOR'] = motors data["Nodes"]['LENS']['WORLD'] = world data["Edges"] = edges data["CPDs"] = cpds ''' dumping to CASENAME file in jason format''' self.save_json(json.dumps(data)) print("Json file for - ", self.file, " - created") def create_json_template(self): """ A helping method if the jason template in the project_repository ditectory has been deleted or corrupted :param : void :return: void :type : void :rtype : void """ self.get_json_path( "Template" ) # creates the right path in which case_name will be saved data = self.get_empty_canvas() data['header']['Identification'] = self.file '''Filling some dummies to facilitate the user''' a_node = ['*', 0] an_edge = {'*': ['&', '&', '&']} a_cpd = {'*': [[0, 0, 0], [0, 0, 0]]} nodes = [] edges = [] cpds = [] for i in range(0, 3): nodes.append(a_node) edges.append(an_edge) cpds.append(a_cpd) data['Nodes']['RONS']['BENS'] = nodes data['Nodes']['RONS']['MEMS'] = nodes data['Nodes']['LANS']['LANS'] = nodes data['Nodes']['LENS']['MOTOR'] = nodes data['Nodes']['LENS']['WORLD'] = nodes data['Edges'] = edges data['CPDs'] = cpds ''' dumping to CASENAME file in jason format''' # with open(case_name, 'w') as f: # json.dump(data, f, separators = (",",":")) self.save_json(json.dumps(data)) print("Empty template created") def get_empty_canvas(self): """ This method creates a json canvas which will be used for the several json creating method :param : void :return: a dictionary with the structure of the json file :type : non :rtype : dictionary """ data = { 'header': { 'Identification': '', 'Date': '', 'Description': '', 'Frozen': '', 'Train_from': '' }, 'Nodes': {}, 'Edges': [], 'CPDs': [] } ''' - the 5 next lines tells how much nodes and their names the model will start with the names can be any valid python string''' bens = [] mems = [] lans = [] motors = [] world = [] ''' - the next items describe the edges as a dictionary -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty''' edges = [] ''' - the next items describe the CPD's as a dictionary -> the dictionary entry is the corresponding node''' cpds = [] ''' - feeding the data''' data['Nodes'] = { 'RONS': { 'BENS': bens, 'MEMS': mems }, 'LANS': { 'LANS': lans }, 'LENS': { 'MOTOR': motors, 'WORLD': world } } data['Edges'] = edges data['CPDs'] = cpds return data
('staff_of_d_g', 'is_continuous_g'), ('staff_of_d_g', 'exit_stroke_d_g'), ('staff_of_d_g', 'is_lowercase_g'), ('staff_of_d_g', 'slantness_g'), ('staff_of_d_g', 'entry_stroke_a_g'), ('word_formation_g', 'dimension_g'), ('word_formation_g', 'staff_of_a_g'), ('word_formation_g', 'size_g'), ('word_formation_g', 'staff_of_d_g'), ('word_formation_g', 'constancy_g'), ('constancy_g', 'staff_of_a_g'), ('constancy_g', 'letter_spacing_g'), ('constancy_g', 'dimension_g'), ('dimension_f', 'output'), ('dimension_g', 'output')]) print(verify_model.edges()) print(verify_model.nodes()) fields = ['left', 'right', 'label'] seen_dat = pd.read_csv('dataset_seen_training_siamese_seen.csv', usecols=fields) seen_dat['pen_pressure_f'] = '' seen_dat['letter_spacing_f'] = '' seen_dat['size_f'] = '' seen_dat['dimension_f'] = '' seen_dat['is_lowercase_f'] = '' seen_dat['is_continuous_f'] = '' seen_dat['slantness_f'] = '' seen_dat['tilt_f'] = '' seen_dat['entry_stroke_a_f'] = ''
class Network_handler: ''' Handles creation and usage of the probabilistic network over CERN's data. Can deal only with a SINGLE file-priority combination. Note that the methods of this class have numbers and must be called in order. ''' def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix() def learn_structure(self, method, scoring_method, log=True): ''' (4) Method that builds the structure of the data ----------------- Parameters: method : The technique used to search for the structure -> scoring_approx - To use an approximated search with scoring method -> scoring_exhaustive - To use an exhaustive search with scoring method -> constraint - To use the constraint based technique scoring_method : K2, bic, bdeu log - "True" if you want to print debug information in the console ''' #Select the scoring method for the local search of the structure if scoring_method == "K2": scores = K2Score(self.data) elif scoring_method == "bic": scores = BicScore(self.data) elif scoring_method == "bdeu": scores = BdeuScore(self.data) #Select the actual method if method == "scoring_approx": est = HillClimbSearch(self.data, scores) elif method == "scoring_exhaustive": est = ExhaustiveSearch(self.data, scores) elif method == "constraint": est = ConstraintBasedEstimator(self.data) self.best_model = est.estimate() self.eliminate_isolated_nodes( ) # REMOVE all nodes not connected to anything else for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) self.log("Method used for structural learning: " + method, log) #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log) self.log("Search terminated", log) def estimate_parameters(self, log=True): ''' (5) Estimates the parameters of the found network ''' estimator = BayesianEstimator(self.best_model, self.data) self.file_writer.write_txt("Number of nodes: " + str(len(self.variables_names))) self.file_writer.write_txt("Complete list: " + str(self.variables_names)) for node in self.best_model.nodes(): cpd = estimator.estimate_cpd(node, prior_type='K2') self.best_model.add_cpds(cpd) self.log(cpd, log) self.file_writer.write_txt(cpd.__str__()) def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) ''' map_query = inference.map_query(variables, evidence) print(map_query) ''' def draw_network(self, label_choice, location_choice, location, log): ''' (7) Draws the bayesian network. ---- location_choice = True iff we want to show the location of devices in the graph. label_choice = "single" if we want to show single label, "double" for double label of arcs location = 0,1,2 depending by the location (H0, H1, H2) ''' bn_graph = gv.Digraph(format="png") # Extract color based on the building if location_choice: devices = self.variables_names device_location = dict() device_locationH1 = dict() #For H0 for d in devices: allDevicesLocations = self.general_handler.get_device_locations( ) device_location[d] = allDevicesLocations[d][0] device_locationH1[d] = allDevicesLocations[d][1] #temp for H1 location_color = self.assign_color(device_location) location_colorH1 = self.assign_color(device_locationH1) ''' # Logging and saving info self.log(device_location, log) self.log(location_color, log) self.file_writer.write_txt(device_location, newline = True) self.file_writer.write_txt(location_color, newline = True) ''' # Creating the subgraphs, one for each location: loc_subgraphs = dict() for loc in location_color: name = "cluster_" + loc loc_subgraphs[loc] = gv.Digraph(name) loc_subgraphs[loc].graph_attr[ 'label'] = loc #Label with name to be visualized in the image # Create nodes for node in self.best_model.nodes(): if location_choice: locationH0 = device_location[node] locationH1 = device_locationH1[node] loc_subgraphs[locationH0].node( node, style='filled', fillcolor=location_colorH1[locationH1] ) #add the node to the right subgraph #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0 else: bn_graph.node(node) # Add all subgraphs in the final graph: if location_choice: for loc in loc_subgraphs: bn_graph.subgraph(loc_subgraphs[loc]) # Create and color edges for edge in self.best_model.edges_iter(): inference = VariableElimination(self.best_model) label = "" # Inference for first label and color of edges variables = [edge[1]] evidence = dict() evidence[edge[0]] = 1 phi_query = inference.query(variables, evidence) value = phi_query[edge[1]].values[1] value = round(value, 2) if label_choice == "single": label = str(value) if label_choice == "double": # Inference for second label variables = [edge[0]] evidence = dict() evidence[edge[1]] = 1 phi_query = inference.query(variables, evidence) value_inv = phi_query[edge[0]].values[1] value_inv = round(value_inv, 2) label = str(value) + "|" + str(value_inv) if value >= 0.75: bn_graph.edge(edge[0], edge[1], color="red", label=label) else: bn_graph.edge(edge[0], edge[1], color="black", label=label) # Save the .png graph if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix else: if location_choice: locat = "_H0H1" else: locat = "" imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat bn_graph.render(imgPath) os.remove(imgPath) #remove the source code generated by graphviz def data_info(self, selection, log): ''' (9) Prints or logs some extra information about the data or the network ''' # 1 - DEVICE FREQUENCY AND OCCURRENCES if 1 in selection: self.file_writer.write_txt( "Device ranking (max 20 devices are visualized)", newline=True) i = 1 for dr in self.rankedDevices: self.file_writer.write_txt(dr[0] + " \t" + str(dr[1]) + "\t" + str(dr[2])) i = i + 1 if i == 20: break # 2 - EDGES OF THE NETWORK if 2 in selection: self.file_writer.write_txt("Edges of the network:", newline=True) for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) # 3 - MARKOV NETWORK if 3 in selection: self.markov = self.best_model.to_markov_model( ) #create the markov model from the BN nice_graph = pydot.Dot(graph_type='graph') for node in self.markov.nodes(): node_pydot = pydot.Node(node) nice_graph.add_node(node_pydot) for edge in self.markov.edges(): edge_pydot = pydot.Edge(edge[0], edge[1], color="black") nice_graph.add_edge(edge_pydot) nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-markov.png') self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True) for factor in self.markov.factors: self.log("MARKOV---------------------------------------", log) self.log(factor, log) self.file_writer.write_txt(factor.__str__()) # 4 - INFERENCE NETWORK if 4 in selection: nice_graph = pydot.Dot(graph_type='digraph') nodes = self.best_model.nodes() inference = VariableElimination(self.best_model) for node1 in nodes: pos = nodes.index(node1) + 1 for i in range(pos, len(nodes)): node2 = nodes[i] variables = [node2] evidence = dict() evidence[node1] = 1 phi_query = inference.query(variables, evidence) prob1 = phi_query[node2].values[ 1] #probability of direct activation (inference from node1=1 to node2) variables = [node1] evidence = dict() evidence[node2] = 1 phi_query = inference.query(variables, evidence) prob2 = phi_query[node1].values[ 1] #probability of inverse activation (inference from node2=1 to node1) prob1 = round(prob1, 2) prob2 = round(prob2, 2) if prob1 >= 0.75 and ( prob1 - prob2 ) <= 0.40: #add direct arc from node1 to node2 ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="red", label=double_label)) elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="red", label=double_label)) elif prob1 >= 0.75 and prob2 >= 0.75: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="orange", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="orange", label=double_label)) elif prob1 >= 0.55 and prob2 >= 0.55: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="black", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="black", label=double_label)) if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix nice_graph.write_png(imgPath + "-inference_network.png") else: nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-inference_network.png') def fix_node_presence(self, nodes, pydot_graph): ''' Adds the list of nodes to the graph, if they are not already present ''' for node in nodes: if node not in pydot_graph.get_nodes(): pydot_graph.add_node(pydot.Node(node)) def eliminate_isolated_nodes(self): ''' If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph ''' for nodeX in self.best_model.nodes(): tup = [item for item in self.best_model.edges() if nodeX in item] if not tup: self.file_writer.write_txt( "Node " + str(nodeX) + " has no edges: it has been eliminated.") self.best_model.remove_node(nodeX) if self.best_model.nodes() == []: raise DataError("No nodes left in this file-priority combination.") def assign_color(self, device_location): ''' Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS) ''' system_color = [ 'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey', 'Light Red', 'Light Blue', 'Light Green' ] location_color = dict() # key = location; value = color for dev, loc in device_location.items(): if loc not in location_color: color = system_color[0] system_color.remove(color) location_color[loc] = color return location_color def log(self, text, log): ''' Prints the text in the console, if the "log" condition is True. ''' if log: print(text)
exam_cpd = TabularCPD( variable='Exam', variable_card=2, values=[[0.95,0.2], [0.05,0.8]], evidence=['Musicianship'], evidence_card=[2]) #print(rating_cpd) #print(difficulty_cpd) #print(musicianship_cpd) #print(letter_cpd) #print(exam_cpd) print(music_model.edges()) #Add the CPDS to the model music_model.add_cpds(difficulty_cpd,musicianship_cpd,letter_cpd,exam_cpd,rating_cpd) #print(music_model.get_cpds()) print(music_model.check_model()) #Create object to perform inference on model music_infer = VariableElimination(music_model) #Probability Musicianship m_1 = music_infer.query(variables=['Musicianship']) print(m_1['Musicianship'])
#coding: utf-8 from pgmpy.models import BayesianModel from pgmpy.factors.discrete import TabularCPD #定义模型结构 cancer_model = BayesianModel([ ('PT', 'HO'), #('party', 'hangover'), ('HO', 'SA'),#'hangover', 'smell-alcohol'), ('HO', 'HA'), #('hangover', 'headache'), ('BT', 'HA'),#'brain-tumor', 'headache'), ('BT', 'PX') #('brain-tumor', 'pos-xray'), ]) for e in cancer_model.edges(): print(e) #添加概率 cpd_party = TabularCPD(variable='PT', variable_card=2, values=[[0.8], [0.2]]) cpd_braintumor = TabularCPD(variable='BT', variable_card=2, values=[[0.999], [0.001]]) cpd_hangover = TabularCPD(variable='HO', variable_card=2, values=[[1.000, 0.300], [0.000, 0.700]], evidence=['PT'], evidence_card=[2]) cpd_smellalcohol = TabularCPD(variable='SA', variable_card=2, values=[[0.900, 0.200], [0.100, 0.800]], evidence=['HO'], evidence_card=[2])
# Bayesian network for students from pgmpy.models import BayesianModel model = BayesianModel() # Add nodes model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter']) print(model.nodes()) # Add edges model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')]) print(model.edges())
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"), ("b", "c")]) self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")]) diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD( "grade", 3, values=[ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"), ("i", "l")]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")] or (edge[1], edge[0]) in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"), ("a", "b")]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")] or (edge[1], edge[0]) in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of("g") ancenstors2 = self.G2._get_ancestors_of("d") ancenstors3 = self.G2._get_ancestors_of(["i", "l"]) self.assertEqual(ancenstors1, {"d", "i", "g"}) self.assertEqual(ancenstors2, {"d"}) self.assertEqual(ancenstors3, {"g", "i", "l", "d"}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, "h") def test_get_cardinality(self): self.assertDictEqual(self.G1.get_cardinality(), { "diff": 2, "intel": 3, "grade": 3 }) def test_get_cardinality_with_node(self): self.assertEqual(self.G1.get_cardinality("diff"), 2) self.assertEqual(self.G1.get_cardinality("intel"), 3) self.assertEqual(self.G1.get_cardinality("grade"), 3) def test_local_independencies(self): self.assertEqual(self.G.local_independencies("a"), Independencies(["a", ["b", "c"]])) self.assertEqual( self.G.local_independencies("c"), Independencies(["c", ["a", "d", "e"], "b"]), ) self.assertEqual(self.G.local_independencies("d"), Independencies(["d", "c", ["b", "a"]])) self.assertEqual( self.G.local_independencies("e"), Independencies(["e", ["c", "b", "a"], "d"]), ) self.assertEqual(self.G.local_independencies("b"), Independencies(["b", "a"])) self.assertEqual(self.G1.local_independencies("grade"), Independencies()) def test_get_independencies(self): chain = BayesianModel([("X", "Y"), ("Y", "Z")]) self.assertEqual(chain.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) fork = BayesianModel([("Y", "X"), ("Y", "Z")]) self.assertEqual(fork.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) collider = BayesianModel([("X", "Y"), ("Z", "Y")]) self.assertEqual(collider.get_independencies(), Independencies(("X", "Z"), ("Z", "X"))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128, ] JPD = JointProbabilityDistribution(["diff", "intel", "grade"], [2, 3, 3], val) fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_markov_blanet(self): G = DAG([ ("x", "y"), ("z", "y"), ("y", "w"), ("y", "v"), ("u", "w"), ("s", "v"), ("w", "t"), ("w", "m"), ("v", "n"), ("v", "q"), ]) self.assertEqual(set(G.get_markov_blanket("y")), set(["s", "w", "x", "u", "z", "v"])) def test_get_immoralities(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")}) G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")]) self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")}) G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"), ("w", "x")]) self.assertEqual(G2.get_immoralities(), {("w", "z")}) def test_is_iequivalent(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")]) G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds("diff")), id(model_copy.get_cpds("diff"))) self.G1.remove_cpds("diff") diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds("diff"), model_copy.get_cpds("diff")) self.G1.remove_node("intel") self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node("diff") self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") def test_remove_nodes_from(self): self.G1.remove_nodes_from(["diff", "grade"]) self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") self.assertRaises(ValueError, self.G1.get_cpds, "grade") def tearDown(self): del self.G del self.G1
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [['a', 'b'], ['b', 'c']]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node('a') self.assertListEqual(self.G.nodes(), ['a']) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(['a', 'b', 'c', 'd']) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd']) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge('d', 'e') self.assertListEqual(sorted(self.G.nodes()), ['d', 'e']) self.assertListEqual(self.G.edges(), [('d', 'e')]) self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edge('a', 'b') self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['d', 'e']]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, 'a', 'a') def test_add_edge_result_cycle(self): self.G.add_edges_from([('a', 'b'), ('a', 'c')]) self.assertRaises(ValueError, self.G.add_edge, 'c', 'a') def test_add_edges_from_string(self): self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [['a', 'b'], ['b', 'c']]) self.G.add_nodes_from(['d', 'e', 'f']) self.G.add_edges_from([('d', 'e'), ('e', 'f')]) self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertListEqual(hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'), ('e', 'f')])) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'), ('b', 'c'), ('c', 'a')]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.g.predecessors('a'), []) self.assertListEqual(self.g.predecessors('b'), ['a']) self.assertListEqual(self.g.predecessors('c'), ['b']) def test_update_node_parents(self): self.G.add_nodes_from(['a', 'b', 'c']) self.G.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertListEqual(self.G.predecessors('a'), []) self.assertListEqual(self.G.predecessors('b'), ['a']) self.assertListEqual(self.G.predecessors('c'), ['b']) def tearDown(self): del self.G
import numpy as np import pandas as pd from pgmpy.models import BayesianModel from pgmpy.estimators import BayesianEstimator # Generating random data for two coin tossing examples raw_data = np.random.randint(low=0, high=2, size=(1000, 2)) data = pd.DataFrame(raw_data, columns=['X', 'Y']) print(data) coin_model = BayesianModel() coin_model.fit(data, estimator=BayesianEstimator) coin_model.get_cpds() coin_model.nodes() coin_model.edges()
class TestBaseModelCreation(unittest.TestCase): def setUp(self): self.G = BayesianModel() def test_class_init_without_data(self): self.assertIsInstance(self.G, nx.DiGraph) def test_class_init_with_data_string(self): self.g = BayesianModel([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.g.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.g.edges()), [["a", "b"], ["b", "c"]]) def test_class_init_with_data_nonstring(self): BayesianModel([(1, 2), (2, 3)]) def test_add_node_string(self): self.G.add_node("a") self.assertListEqual(list(self.G.nodes()), ["a"]) def test_add_node_nonstring(self): self.G.add_node(1) def test_add_nodes_from_string(self): self.G.add_nodes_from(["a", "b", "c", "d"]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d"]) def test_add_nodes_from_non_string(self): self.G.add_nodes_from([1, 2, 3, 4]) def test_add_edge_string(self): self.G.add_edge("d", "e") self.assertListEqual(sorted(self.G.nodes()), ["d", "e"]) self.assertListEqual(list(self.G.edges()), [("d", "e")]) self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edge("a", "b") self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["d", "e"]]) def test_add_edge_nonstring(self): self.G.add_edge(1, 2) def test_add_edge_selfloop(self): self.assertRaises(ValueError, self.G.add_edge, "a", "a") def test_add_edge_result_cycle(self): self.G.add_edges_from([("a", "b"), ("a", "c")]) self.assertRaises(ValueError, self.G.add_edge, "c", "a") def test_add_edges_from_string(self): self.G.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"]) self.assertListEqual(hf.recursive_sorted(self.G.edges()), [["a", "b"], ["b", "c"]]) self.G.add_nodes_from(["d", "e", "f"]) self.G.add_edges_from([("d", "e"), ("e", "f")]) self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d", "e", "f"]) self.assertListEqual( hf.recursive_sorted(self.G.edges()), hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"), ("e", "f")]), ) def test_add_edges_from_nonstring(self): self.G.add_edges_from([(1, 2), (2, 3)]) def test_add_edges_from_self_loop(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")]) def test_add_edges_from_result_cycle(self): self.assertRaises(ValueError, self.G.add_edges_from, [("a", "b"), ("b", "c"), ("c", "a")]) def test_update_node_parents_bm_constructor(self): self.g = BayesianModel([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.g.predecessors("a")), []) self.assertListEqual(list(self.g.predecessors("b")), ["a"]) self.assertListEqual(list(self.g.predecessors("c")), ["b"]) def test_update_node_parents(self): self.G.add_nodes_from(["a", "b", "c"]) self.G.add_edges_from([("a", "b"), ("b", "c")]) self.assertListEqual(list(self.G.predecessors("a")), []) self.assertListEqual(list(self.G.predecessors("b")), ["a"]) self.assertListEqual(list(self.G.predecessors("c")), ["b"]) def tearDown(self): del self.G