def setUp(self): dbn_1 = DynamicBayesianNetwork() dbn_1.add_edges_from( [(('Z', 0), ('X', 0)), (('Z', 0), ('Y', 0)), (('Z', 0), ('Z', 1))]) cpd_start_z_1 = TabularCPD(('Z', 0), 2, [[0.8, 0.2]]) cpd_x_1 = TabularCPD( ('X', 0), 2, [[0.9, 0.6], [0.1, 0.4]], [('Z', 0)], [2]) cpd_y_1 = TabularCPD( ('Y', 0), 2, [[0.7, 0.2], [0.3, 0.8]], [('Z', 0)], [2]) cpd_trans_z_1 = TabularCPD( ('Z', 1), 2, [[0.9, 0.1], [0.1, 0.9]], [('Z', 0)], [2]) dbn_1.add_cpds(cpd_start_z_1, cpd_trans_z_1, cpd_x_1, cpd_y_1) dbn_1.initialize_initial_state() self.dbn_inference_1 = DBNInference(dbn_1) dbn_2 = DynamicBayesianNetwork() dbn_2.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)), (('Z', 0), ('Z', 1))]) cpd_start_z_2 = TabularCPD(('Z', 0), 2, [[0.5, 0.5]]) cpd_x_2 = TabularCPD( ('X', 0), 2, [[0.6, 0.9], [0.4, 0.1]], [('Z', 0)], [2]) cpd_y_2 = TabularCPD( ('Y', 0), 2, [[0.2, 0.3], [0.8, 0.7]], [('X', 0)], [2]) cpd_z_2 = TabularCPD( ('Z', 1), 2, [[0.4, 0.7], [0.6, 0.3]], [('Z', 0)], [2]) dbn_2.add_cpds(cpd_x_2, cpd_y_2, cpd_z_2, cpd_start_z_2) dbn_2.initialize_initial_state() self.dbn_inference_2 = DBNInference(dbn_2)
def setUp(self): self.network = DynamicBayesianNetwork() self.grade_cpd = TabularCPD( ("G", 0), 3, values=[[0.3, 0.05, 0.8, 0.5], [0.4, 0.25, 0.1, 0.3], [0.3, 0.7, 0.1, 0.2]], evidence=[("D", 0), ("I", 0)], evidence_card=[2, 2], ) self.d_i_cpd = TabularCPD( ("D", 1), 2, values=[[0.6, 0.3], [0.4, 0.7]], evidence=[("D", 0)], evidence_card=[2], ) self.diff_cpd = TabularCPD(("D", 0), 2, values=[[0.6, 0.4]]) self.intel_cpd = TabularCPD(("I", 0), 2, values=[[0.7, 0.3]]) self.i_i_cpd = TabularCPD( ("I", 1), 2, values=[[0.5, 0.4], [0.5, 0.6]], evidence=[("I", 0)], evidence_card=[2], ) self.grade_1_cpd = TabularCPD( ("G", 1), 3, values=[[0.3, 0.05, 0.8, 0.5], [0.4, 0.25, 0.1, 0.3], [0.3, 0.7, 0.1, 0.2]], evidence=[("D", 1), ("I", 1)], evidence_card=[2, 2], )
def setUp(self): self.G = DynamicBayesianNetwork() self.G.add_edges_from([ (("D", 0), ("G", 0)), (("I", 0), ("G", 0)), (("D", 0), ("D", 1)), (("I", 0), ("I", 1)), ]) """
def query_time_frame_3(): # Dynamic Bayesian Network only supports 2-time slice, 2 time frame. Hence, create new DBN with # datas of time 2 and time 3 to query nodes in time 3. data23 = self.data.rename( columns={ 'DPQ2': ('DPQ', 0), 'C2': ('C', 0), 'TQ2': ('TQ', 0), 'OU2': ('OU', 0), 'DI2': ('DI', 0), 'DFT2': ('DFT', 0), 'RD2': ('RD', 0), 'DFO2': ('DFO', 0), 'DPQ3': ('DPQ', 1), 'C3': ('C', 1), 'TQ3': ('TQ', 1), 'OU3': ('OU', 1), 'DI3': ('DI', 1), 'DFT3': ('DFT', 1), 'RD3': ('RD', 1), 'DFO3': ('DFO', 1) }) data23 = data23.drop( ['DPQ', 'C', 'TQ', 'OU', 'DI', 'DFT', 'RD', 'DFO'], 1) self.model23 = DynamicBayesianNetwork() self.model23.add_edges_from([(('DPQ', 0), ('DI', 0)), (('C', 0), ('DI', 0)), (('TQ', 0), ('DFT', 0)), (('DI', 0), ('DFT', 0)), (('DI', 0), ('RD', 0)), (('DFT', 0), ('RD', 0)), (('RD', 0), ('DFO', 0)), (('OU', 0), ('DFO', 0)), (('DPQ', 0), ('DPQ', 1)), (('C', 0), ('C', 1)), (('TQ', 0), ('TQ', 1)), (('OU', 0), ('OU', 1)), (('RD', 0), (('DI', 1)))]) add_cpds_to_model(self.model23, data23) # save state names to draw graph for key, names in self.model23.state_names.iteritems(): if key[1] == 1: self.state_names[(key[0], 2)] = names pr3 = pr2 nodes3 = nodes2 print 'query 3', pr3, nodes3 # pr = {('DPQ', 1): 1,...} | nodes = [('DPQ', 1),...] infer3 = DBNInferenceRewritten(self.model23) query = infer3.query(nodes3, evidence=pr3) for key, value in query.iteritems(): Distribution[(key[0], 2)] = value.values
def setUp(self): self.network = DynamicBayesianNetwork() self.grade_cpd = TabularCPD( ('G', 0), 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.2, 0.2]], [('D', 0), ('I', 0)], [2, 2]) self.d_i_cpd = TabularCPD(('D', 1), 2, [[0.6, 0.3], [0.4, 0.7]], [('D', 0)], 2) self.diff_cpd = TabularCPD(('D', 0), 2, [[0.6, 0.4]]) self.intel_cpd = TabularCPD(('I', 0), 2, [[0.7, 0.3]]) self.i_i_cpd = TabularCPD(('I', 1), 2, [[0.5, 0.4], [0.5, 0.6]], [('I', 0)], 2) self.grade_1_cpd = TabularCPD( ('G', 1), 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.2, 0.2]], [('D', 1), ('I', 1)], [2, 2])
def __init__(self, model_file='../DBN/network.nx'): nx_model = nx.read_gpickle(model_file) self.dbn = DynamicBayesianNetwork(nx_model.edges()) self.dbn.add_cpds(*nx_model.cpds) self.dbn.initialize_initial_state() self.dbn_infer = DBNInference(self.dbn)
if values[IX_CORRECT_ACTION] == PROMPT: curr_sample[('Prompt', 1)] = 1 elif values[IX_CORRECT_ACTION] == REWARD: curr_sample[('Reward', 1)] = 1 elif values[IX_CORRECT_ACTION] == ABORT: curr_sample[('Abort', 1)] = 1 final_samples.append(curr_sample) # LEARNS STRUCTURE FROM DATA print 'Learning model' data = pd.DataFrame(final_samples) hc = HillClimbSearchDBN(data, scoring_method=BicScore(data)) # GIVE STRUCTURE LEARNING ALGORITHM A HINT OF THE STRUCTURE nodes = hc.state_names.keys() start = DynamicBayesianNetwork() nodes = set(X[0] for X in nodes) start.add_nodes_from_ts(nodes, [0, 1]) # start.add_edge(('P', 0), ('R', 0)) # start.add_edge(('P', 0), ('R', 1)) # start.add_edge(('P', 0), ('A', 0)) # start.add_edge(('P', 0), ('A', 1)) # start.add_edge(('P', 0), ('P', 1)) model = hc.estimate(start=start, tabu_length=10, max_indegree=2) # LEARNS PARAMETERS FROM DATA print 'Learning parameters' model.fit(data) # model.fit(data, estimator=BayesianEstimator) # FINALIZES MODEL
import numpy as np import pandas as pd import sys from pgmpy.models import DynamicBayesianNetwork from pgmpy.estimators import BayesianEstimator from pgmpy.inference import VariableElimination import matplotlib.pyplot as plt model = DynamicBayesianNetwork() list_edges = [(('DPQ', 0), ('DI', 0)), (('C', 0), ('DI', 0))] for i in range(3): list_edges += [(('DI', i), ('DFT', i)), (('TQ', i), ('DFT', i)), (('DFT', i), ('RD', i)), (('RD', i), ('DFO', i)), (('OU', i), ('DFO', i))] if (i == 2): break list_edges += [(('RD', i), ('DI', i + 1)), (('TQ', i), ('TQ', i + 1)), (('OU', i), ('OU', i + 1))] model.add_edges_from(list_edges) print(model.edges()) print(model.nodes())
def setUp(self): self.network = DynamicBayesianNetwork()
from pgmpy.models import DynamicBayesianNetwork from pgmpy.factors.discrete import TabularCPD from pgmpy.estimators import HillClimbSearchDBN, BicScore import networkx as nx import random as rand import pandas as pd # CREATES SIMULATED DBN MODEL dbn = DynamicBayesianNetwork() # Node Name Values # I Subject Interest engaged, neutral, off # A Subject Action response, no response # R Robot Action prompt, fail, reward # O Observation q values dbn.add_nodes_from(['I', 'A', 'R', 'O']) # Check diagram for details # I -----------> I2 # | ------------^ # v / | # A ---> R ------- # | # v # O dbn.add_edges_from([(('I', 0), ('A', 0)), (('I', 0), ('R', 0)), (('I', 0), ('I', 1)), (('A', 0), ('O', 0)), (('A', 0), ('R', 0)), (('A', 0), ('I', 1)), (('R', 0), ('I', 1))])
def process(self): def add_cpds_to_model(model, data): mle = MaximumLikelihoodEstimator(model, data) cpds = [] nodes = model.get_slice_nodes(0) + model.get_slice_nodes(1) for node in nodes: cpds.append(mle.estimate_cpd(node)) model.add_cpds(*cpds) model.state_names = mle.state_names def calculate_distribution_nodes_input(): for key in pr.keys(): Distribution[key] = [ 1 - abs(np.sign(pr[key] - i)) for i in range(5) ] Distribution[(key[0], 1)] = Distribution[key] Distribution[(key[0], 2)] = Distribution[key] nodes.remove(key) nodes2.remove((key[0], 1)) def query_time_frame_1(): print 'query 1', pr, nodes query = infer.query(nodes, evidence=pr) for key, value in query.iteritems(): Distribution[key] = value.values def query_time_frame_2(): global pr2 for key, value in pr.iteritems(): pr2[(key[0], 1)] = pr[key] print 'query 2', pr2, nodes2 query = infer.query(nodes2, evidence=pr2) for key, value in query.iteritems(): Distribution[key] = value.values def query_time_frame_3(): # Dynamic Bayesian Network only supports 2-time slice, 2 time frame. Hence, create new DBN with # datas of time 2 and time 3 to query nodes in time 3. data23 = self.data.rename( columns={ 'DPQ2': ('DPQ', 0), 'C2': ('C', 0), 'TQ2': ('TQ', 0), 'OU2': ('OU', 0), 'DI2': ('DI', 0), 'DFT2': ('DFT', 0), 'RD2': ('RD', 0), 'DFO2': ('DFO', 0), 'DPQ3': ('DPQ', 1), 'C3': ('C', 1), 'TQ3': ('TQ', 1), 'OU3': ('OU', 1), 'DI3': ('DI', 1), 'DFT3': ('DFT', 1), 'RD3': ('RD', 1), 'DFO3': ('DFO', 1) }) data23 = data23.drop( ['DPQ', 'C', 'TQ', 'OU', 'DI', 'DFT', 'RD', 'DFO'], 1) self.model23 = DynamicBayesianNetwork() self.model23.add_edges_from([(('DPQ', 0), ('DI', 0)), (('C', 0), ('DI', 0)), (('TQ', 0), ('DFT', 0)), (('DI', 0), ('DFT', 0)), (('DI', 0), ('RD', 0)), (('DFT', 0), ('RD', 0)), (('RD', 0), ('DFO', 0)), (('OU', 0), ('DFO', 0)), (('DPQ', 0), ('DPQ', 1)), (('C', 0), ('C', 1)), (('TQ', 0), ('TQ', 1)), (('OU', 0), ('OU', 1)), (('RD', 0), (('DI', 1)))]) add_cpds_to_model(self.model23, data23) # save state names to draw graph for key, names in self.model23.state_names.iteritems(): if key[1] == 1: self.state_names[(key[0], 2)] = names pr3 = pr2 nodes3 = nodes2 print 'query 3', pr3, nodes3 # pr = {('DPQ', 1): 1,...} | nodes = [('DPQ', 1),...] infer3 = DBNInferenceRewritten(self.model23) query = infer3.query(nodes3, evidence=pr3) for key, value in query.iteritems(): Distribution[(key[0], 2)] = value.values # sketch number axis with max values = max values DI + 1 def stretch_distributions(max_value_di): remove_nodes = [] for time in range(3): remove_nodes.append(('DPQ', time)) remove_nodes.append(('C', time)) remove_nodes.append(('OU', time)) remove_nodes.append(('TQ', time)) ns = nodes + nodes2 + [(node[0], 2) for node in nodes] for key in ns: if key not in remove_nodes: if self.state_names[key][-1] == max_value_di: self.state_names[key].append(max_value_di + 1) Distribution[key] = np.append(Distribution[key], [0]) elif self.state_names[key][-1] < max_value_di: self.state_names[key].extend( [self.state_names[key][-1] + 1, max_value_di + 1]) Distribution[key] = np.append(Distribution[key], [0, 0]) def standarlize_distribution(): # use when data size is too small and length(DPQ or C or TQ or OU) < 5 => error when draw graph ns = ['DPQ', 'TQ', 'C', 'OU'] for node in ns: exist_in_pr = False for key in pr.keys(): if key[0] == node: exist_in_pr = True break if not exist_in_pr: for index in range(5): if index not in self.model.state_names[(node, 0)]: Distribution[(node, 0)].insert(index, 0) Distribution[(node, 1)].insert(index, 0) Distribution[(node, 2)].insert(index, 0) if self.history_file != self.file_path: self.data = pd.read_csv(self.file_path) # "fisrm.csv" self.data_size = len(self.data) self.history_file = self.file_path self.state_names = {} self.model = DynamicBayesianNetwork() self.model.add_edges_from([(('DPQ', 0), ('DI', 0)), (('C', 0), ('DI', 0)), (('TQ', 0), ('DFT', 0)), (('DI', 0), ('DFT', 0)), (('DI', 0), ('RD', 0)), (('DFT', 0), ('RD', 0)), (('RD', 0), ('DFO', 0)), (('OU', 0), ('DFO', 0)), (('DPQ', 0), ('DPQ', 1)), (('C', 0), ('C', 1)), (('TQ', 0), ('TQ', 1)), (('OU', 0), ('OU', 1)), (('RD', 0), (('DI', 1)))]) global pr global pr2 global pr3 pr = self.processBox() pr2 = {} pr3 = {} nodes = self.model.get_slice_nodes(0) nodes2 = self.model.get_slice_nodes(1) Distribution = {} # Rename and drop data columns to use MaximumLikelyHood data12 = self.data.rename( columns={ 'DPQ': ('DPQ', 0), 'C': ('C', 0), 'TQ': ('TQ', 0), 'DI': ('DI', 0), 'DFT': ('DFT', 0), 'RD': ('RD', 0), 'DFO': ('DFO', 0), 'OU': ('OU', 0), 'DPQ2': ('DPQ', 1), 'C2': ('C', 1), 'TQ2': ('TQ', 1), 'OU2': ('OU', 1), 'DI2': ('DI', 1), 'DFT2': ('DFT', 1), 'RD2': ('RD', 1), 'DFO2': ('DFO', 1) }) data12 = data12.drop( ['DPQ3', 'C3', 'TQ3', 'OU3', 'DI3', 'DFT3', 'RD3', 'DFO3'], 1) add_cpds_to_model(self.model, data12) self.state_names = self.model.state_names infer = DBNInferenceRewritten(self.model) calculate_distribution_nodes_input() query_time_frame_1() query_time_frame_2() query_time_frame_3() max_value_di = self.state_names[('DI', 0)][-1] # array has been sorted stretch_distributions(max_value_di) # standarlize_distribution() self.draw_subplots(Distribution, 0, 1, max_value_di) self.draw_subplots(Distribution, 1, 2, max_value_di) self.draw_subplots(Distribution, 0, 2, max_value_di) plt.show()
def __init__(self): self.dbn = DBN() self.dbn.add_edges_from( [ (('CA', 0), ('C', 0)), (('CA', 0), ('C', 0)), (('CT', 0), ('H', 0)), (('CA', 0), ('H', 0)), (('C', 0), ('P', 0)), (('H', 0), ('P', 0)), (('CA', 0), ('CA', 1)) ] ) self.model = DynamicBayesianNetwork() self.model.add_edges_from( [ (('Classificacao_Angulos', 0), ('Catetos', 0)), (('Classificacao_Triangulos', 0), ('Hipotenusa', 0)), (('Classificacao_Angulos', 0), ('Hipotenusa', 0)), (('Catetos', 0), ('Pitagoras', 0)), (('Hipotenusa', 0), ('Pitagoras', 0)) ] ) self.model = BayesianModel( [ ('Classificacao_Angulos', 'Catetos'), ('Classificacao_Triangulos', 'Hipotenusa'), ('Classificacao_Angulos', 'Hipotenusa'), ('Catetos', 'Pitagoras'), ('Hipotenusa', 'Pitagoras'), ] ) x = 4 cpd_classificao_angulos = TabularCPD( variable='Classificacao_Angulos', variable_card=2, values=[[0.4], [0.6]] ) print(cpd_classificao_angulos) print('\n' * x) cpd_classificao_triangulos = TabularCPD( variable='Classificacao_Triangulos', variable_card=2, values=[[0.3], [0.7]] ) print(cpd_classificao_triangulos) print('\n' * x) cpd_catetos = TabularCPD( variable='Catetos', variable_card=2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Classificacao_Angulos'], evidence_card=[2] ) print(cpd_catetos) print('\n' * x) cpd_hipotenusa = TabularCPD( variable='Hipotenusa', variable_card=2, values=[[0.9, 0.2, 0.3, 0.1], [0.1, 0.8, 0.7, 0.9]], evidence=['Classificacao_Angulos', 'Classificacao_Triangulos'], evidence_card=[2, 2] ) print(cpd_hipotenusa) print('\n' * x) cpd_pitagoras = TabularCPD( variable='Pitagoras', variable_card=2, values=[[0.9, 0.3, 0.3, 0.1], [0.1, 0.7, 0.7, 0.9]], evidence=['Catetos', 'Hipotenusa'], evidence_card=[2, 2] ) print(cpd_pitagoras) print('\n' * x) self.model.add_cpds( cpd_classificao_angulos, cpd_classificao_triangulos, cpd_catetos, cpd_hipotenusa, cpd_pitagoras ) self.model.check_model() self.inference = VariableElimination(self.model)
def setUp(self): self.G = DynamicBayesianNetwork() self.G.add_edges_from( [(('D', 0), ('G', 0)), (('I', 0), ('G', 0)), (('D', 0), ('D', 1)), (('I', 0), ('I', 1))]) """
def estimate(self, start=None, tabu_length=0, max_indegree=None): """ Performs local hill climb search to estimates the `BayesianModel` structure that has optimal score, according to the scoring method supplied in the constructor. Starts at model `start` and proceeds by step-by-step network modifications until a local maximum is reached. Only estimates network structure, no parametrization. Parameters ---------- start: BayesianModel instance The starting point for the local search. By default a completely disconnected network is used. tabu_length: int If provided, the last `tabu_length` graph modifications cannot be reversed during the search procedure. This serves to enforce a wider exploration of the search space. Default value: 100. max_indegree: int or None If provided and unequal None, the procedure only searches among models where all nodes have at most `max_indegree` parents. Defaults to None. Returns ------- model: `BayesianModel` instance A `BayesianModel` at a (local) score maximum. Examples -------- >>> import pandas as pd >>> import numpy as np >>> from pgmpy.estimators import HillClimbSearch, BicScore >>> # create data sample with 9 random variables: ... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI')) >>> # add 10th dependent variable ... data['J'] = data['A'] * data['B'] >>> est = HillClimbSearch(data, scoring_method=BicScore(data)) >>> best_model = est.estimate() >>> sorted(best_model.nodes()) ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] >>> best_model.edges() [('B', 'J'), ('A', 'J')] >>> # search a model with restriction on the number of parents: >>> est.estimate(max_indegree=1).edges() [('J', 'A'), ('B', 'J')] """ epsilon = 1e-8 nodes = self.state_names.keys() if start is None: start = DynamicBayesianNetwork() start.add_dynamic_nodes(nodes) elif not isinstance(start, DynamicBayesianNetwork): raise ValueError( "'start' should be a DynamicBayesianModel " "with the same variables as the data set, or 'None'.") tabu_list = [] current_model = start iteration_counter = 0 while True: best_score_delta = 0 best_operation = None for operation, score_delta in self._legal_operations( current_model, tabu_list, max_indegree): if score_delta > best_score_delta: best_operation = operation best_score_delta = score_delta if best_operation is None or best_score_delta < epsilon: break elif best_operation[0] == '+': current_model.add_edge(*best_operation[1]) tabu_list = ([('-', best_operation[1])] + tabu_list)[:tabu_length] if best_operation[1][0][1] == best_operation[1][1][1]: comp_operation = ((best_operation[1][0][0], 1 - best_operation[1][0][1]), (best_operation[1][1][0], 1 - best_operation[1][1][1])) tabu_list = ([('-', comp_operation)] + tabu_list)[:tabu_length] elif best_operation[0] == '-': current_model.remove_edge(*best_operation[1]) tabu_list = ([('+', best_operation[1])] + tabu_list)[:tabu_length] if best_operation[1][0][1] == best_operation[1][1][1]: comp_operation = ((best_operation[1][0][0], 1 - best_operation[1][0][1]), (best_operation[1][1][0], 1 - best_operation[1][1][1])) tabu_list = ([('+', comp_operation)] + tabu_list)[:tabu_length] elif best_operation[0] == 'flip': X, Y = best_operation[1] current_model.remove_edge(X, Y) current_model.add_edge(Y, X) tabu_list = ([best_operation] + tabu_list)[:tabu_length] if best_operation[1][0][1] == best_operation[1][1][1]: comp_operation = ((best_operation[1][0][0], 1 - best_operation[1][0][1]), (best_operation[1][1][0], 1 - best_operation[1][1][1])) tabu_list = ([('flip', comp_operation)] + tabu_list)[:tabu_length] iteration_counter += 1 if iteration_counter % 100 == 0: print iteration_counter print current_model.edges() print best_score_delta return current_model