def __init__(self, model, actions, py_func): """ model is a pgmpy.BayesianModel actions is a list of (var,value) tuples """ self.py_func = py_func self.parents = sorted(model.get_parents('Y')) self.N = len(self.parents) self.actions = actions self.K = len(actions) self.observational_model = model self.observational_inference = VariableElimination( self.observational_model) self.post_action_models = [ GeneralModel.do(model, action) for action in actions ] self.samplers = [ BayesianModelSampling(model_a) for model_a in self.post_action_models ] self.interventional_distributions = [] for indx, new_model in enumerate(self.post_action_models): infer = VariableElimination(new_model) _, distribution_over_parents = infer.query(self.parents) self.interventional_distributions.append(distribution_over_parents) self.pre_compute()
def network_inference(network_model, data): """ predict the probability of the state of the fault mode(all the missing variables). input:pandas Series object at each time!!! output:the predicted state """ # 输入情况异常处理 if set(data.index) == set(network_model.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.index) - set(network_model.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(network_model.nodes()) - set(data.index) # 选择精确推理,变量消除 model_inference = VariableElimination(network_model) # iterrows 和 下面的 items 分别是针对datafram和字典创建的生成迭代器 states_dict = model_inference.query(variables=missing_variables, evidence=data.to_dict()) #对每行(条)状态进行推理 for k, v in states_dict.items(): l = len(v.values) - 1 if v.values[l] > 0.5: print(k, 'probabilily occured: %.3f' % v.values[l]) else: #print('Normal') return states_dict
def testCalifornia(true_ny_data, true_ca_data, predicted_ca_data, predicted_ny_data): ca_infer = VariableElimination(cities_model) avg_prob = 0.0 # Computing the probability of bronc given smoke. for index, true_ny_label in enumerate(true_ny_data): print "True NY label " + str(true_ny_label) true_ca_label = true_ca_data[index] print "True CA label " + str(true_ca_label) predicted_ca_label = predicted_ca_data[index] print "Predicted CA label " + str(predicted_ca_label) predicted_ny_label = predicted_ny_data[index] prob_ca_protest = ca_infer.query(variables=['True CA'], evidence={ 'True NY': true_ny_label, 'Predicted CA': predicted_ca_label, 'Predicted NY': predicted_ny_label }) factor = prob_ca_protest['True CA'] print factor # print factor.assignment([1]) # avg_prob += prob_ca_protest # avg_prob /= len(true_ny_data) print avg_prob
def Test_Data_Inference_n_steps(self, df_test, n_tsteps): # make a function that can predict N timesteps ahead. df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy() infer = VariableElimination(self.model) dict_unique_vals = dict(zip(df_test.columns, [df_test[i].unique() for i in df_test.columns])) result_list = ['M_t'] if n_tsteps > 1: result_list = result_list+["M_t+{}".format(x) for x in range(1,n_tsteps)] count = 0 self.debug_timmer = [] for key, value in df_test.filter(items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items(): index_key = key if check_data_in_evidence(value, dict_unique_vals): for query_var in result_list: tic = time.time() result = infer.query(variables=[query_var],evidence=value) toc = time.time() - tic self.debug_timmer.append(toc) df_inference_results.at[index_key,'{}_0'.format(query_var)] = result[query_var].values[0] df_inference_results.at[index_key,'{}_1'.format(query_var)] = result[query_var].values[1] df_inference_results.at[index_key,'{}'.format(query_var)] = Map_Occ_Values(result[query_var].values[1]) else: for query_var in result_list: df_inference_results.at[index_key,'{}_0'.format(query_var)] = np.nan df_inference_results.at[index_key,'{}_1'.format(query_var)] = np.nan df_inference_results.at[index_key,'{}'.format(query_var)] = np.nan count+=1 logging.info("thermostat {} - Iterations of test {}".format(self.thermostat.tstat_id, count)) return df_inference_results
def Inference(model, variables, evidence, treshold): #根据evidence查询variable的概率 #evidences: {var1:label, var2: label} #variables: [var3, var4] #treshold: 控制最大概率的阈值 #返回值: 最大概率取值/对应概率为字典 result = {} try: infer = VariableElimination(model) inferresult = infer.query(variables=variables, evidence=evidence) for var in variables: tmpvalue = inferresult[var].values.astype('float32') print(tmpvalue) result[var] = {} if len(tmpvalue[tmpvalue == max( tmpvalue)]) == 1 and max(tmpvalue) > treshold: result[var] = { 'value': np.argmax(tmpvalue), 'prob': max(tmpvalue) } else: result[var] = {'value': np.NaN, 'prob': np.NaN} except: print(traceback.format_exc()) return False, result return True, result
def inference(model, variables=None, evidence=None, verbose=3): ''' Inference is same as asking conditional probability questions to the models. i.e., What is the probability of a sprinkler is on given that it is raining which is basically equivalent of asking $ P(g^1 | i^1) $. Inference algorithms deals with efficiently finding these conditional probability queries. There are two main categories for inference algorithms: 1. Exact Inference: These algorithms find the exact probability values for our queries. 2. Approximate Inference: These algorithms try to find approximate values by saving on computation. Exact Inference There are multiple algorithms for doing exact inference. Two common Inference algorithms with variable Elimination 1. Clique Tree Belief Propagation 2. Variable Elimination The basic concept of variable elimination is same as doing marginalization over Joint Distribution. But variable elimination avoids computing the Joint Distribution by doing marginalization over much smaller factors. So basically if we want to eliminate $ X $ from our distribution, then we compute the product of all the factors involving $ X $ and marginalize over them, thus allowing us to work on much smaller factors. In the above equation we can see that we pushed the summation inside and operated the summation only factors that involved that variable and hence avoiding computing the complete joint distribution. ''' model_infer = VariableElimination(model['model']) # Computing the probability of Wet Grass given Rain. q = model_infer.query(variables=variables, evidence=evidence) print(q) # for varname in variables: # print(q[varname]) return (q)
def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) '''
def inf(self, file1): f1 = open(file1, encoding="utf8") lines = f1.readlines() i = 0 G = BayesianModel() nodeList = {} while i < len(lines): if lines[i] == '\n': break nodeName = self.getnode(lines[i]) valueNum = int(lines[i + 1]) cpd_str = lines[i + 2] sequence = self.getList(lines[i + 3]) card = self.getCard(lines[i + 4]) cpd = self.parseCpd(cpd_str, valueNum, card) l = {} l['nodeName'] = nodeName l['valueNum'] = valueNum l['cpd'] = cpd l['sequence'] = sequence l['card'] = card # l = [nodeName,valueNum,cpd,sequence,card] nodeList[nodeName] = l i += 5 edges = self.getegdes(lines[i + 1]) evidence2 = self.getValue(lines[i + 3]) # print(nodeList) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) for (this, node) in nodeList.items(): if node['sequence'][0] == '': cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], values=node['cpd']) else: cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], evidence=node['sequence'], evidence_card=node['card'], values=node['cpd']) G.add_cpds(cpt) if G.check_model(): # print('1') # belief_propagation = BeliefPropagation(G) inference = VariableElimination(G) result = '' for node in G.nodes(): if node not in evidence2: namelist = [node] result += node + ' ' phi_query = inference.query(variables=namelist, evidence=evidence2, show_progress=False).values result += str(phi_query) + '\n' print(result)
def print_beliefs(self): s_infer = VariableElimination(self._suspects_model) s_dis = s_infer.query(variables=['s'], show_progress=False) print(s_dis) print(self._suspects_in_order) w_infer = VariableElimination(self._weapons_model) w_dis = w_infer.query(variables=['w'], show_progress=False) print(w_dis) print(self._weapons_in_order) r_infer = VariableElimination(self._rooms_model) r_dis = r_infer.query(variables=['r'], show_progress=False) print(r_dis) print(self._rooms_in_order)
def process_segments(size): global Distribution loop = int(np.ceil(float(self.data_size) / size)) last_size = self.data_size - size * (loop - 1) print('size: ', size, ' | last_size ', last_size, ' | loop: ', loop) for i in range(loop): print('process: ', i) self.model.fit(self.data.loc[i * size:(i + 1) * size], estimator_type=BayesianEstimator, prior_type="BDeu", equivalent_sample_size=1, state_names=self.state_names) infer = VariableElimination(self.model) query_time_frame_1(infer) query_time_frame_2(infer) query_time_frame_3(infer) for node in list(nodes + nodes2 + nodes3): temp = [0] * len(self.state_names[node]) length_distribution = len(Distribution[node]) length_state_name = len(self.state_names[node]) for distribution_index in range(0, length_distribution - 1): for value_distr_index in range(length_state_name): temp[value_distr_index] += Distribution[node][ distribution_index][value_distr_index] percent = float(last_size) / size for value_distr_index in range(length_state_name): temp[value_distr_index] += ( Distribution[node][-1][value_distr_index] * percent) Distribution[node] = [x * size / self.data_size for x in temp]
def main(): # Defining the network structure model = BayesianModel([('C', 'H'), ('P', 'H')]) # H: host # P: prize # C: contestant # Defining the CPDs: cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]]) cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]]) cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5], [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5], [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]], evidence=['C', 'P'], evidence_card=[3, 3]) # Associating the CPDs with the network structure. model.add_cpds(cpd_c, cpd_p, cpd_h) # Some other methods # model.get_cpds() # check_model check for the model structure and the associated CPD and # returns True if everything is correct otherwise throws an exception # print model.check_model() # Infering the posterior probability infer = VariableElimination(model) posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0}) print(posterior_p['H'])
def hypothesis_update(self, node, prediction_error, prediction): """ Updates the hypotheses of the generative model to minimize prediction error :param node: name of the node causing the prediction error :param prediction_error: the prediction error itself :param prediction: prediction causing the prediction error :type node : str :type prediction_error: np.array :type prediction: np.array """ # Theoretically speaking a hypothesis update should achieve both perceptual and motor update # Currently in the implementation we make the difference explicit # TODO: Need to have custom implementation of bayesian network, so that prediction errors in proprioceptive # TODO: nodes (motor) are resolved by executing the motor action, and not performing hypo update infer = VariableElimination(self.model) if "motor" in node: self.sensory_input.action(node, prediction_error, prediction) else: for hypo in self.model.get_roots(): result = infer.query( variables=[hypo], evidence={node: np.argmax(prediction_error + prediction)}) before = self.model.get_cpds(hypo).values self.model.get_cpds(hypo).values = result.get(hypo).values logging.debug("node[%s] hypothesis-update from %s to %s", hypo, before, result.get(hypo).values)
def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L'), ('A', 'R'), ('J', 'G')]) factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]).to_factor() factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]).to_factor() factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]).to_factor() factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model)
def update_cpts_causal_model(self): """ Con este método se crean las CPTs usando la biblioteca pgmpy a partir del diccionario de beliefs que tiene el agente. """ adj_list = self.model.get_nodes_and_predecessors() logging.info("Updating cpts from beliefs") var_values = {n : \ self.model.get_variable_values(n) for n in adj_list} backup_model = self.model.pgmodel.copy() for variable in self.beliefs: evidence = adj_list[variable] evidence_card = [len(var_values[parent]) for parent in evidence] cpd_table = TabularCPD(variable=variable, variable_card=\ len(var_values[variable]), values=self.beliefs[variable],\ evidence=evidence, evidence_card=evidence_card) self.model.pgmodel.add_cpds(cpd_table) if self.model.pgmodel.check_model(): self.model.infer_system = VariableElimination(self.model.pgmodel) # logging.info("PGMPY CPTs") # for cpd in self.model.pgmodel.get_cpds(): # logging.info(cpd) else: for cpd in backup_model.get_cpds(): logging.info(cpd) raise ValueError("Error with CPTs")
def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model)
def get_most_probable_weapon(self): """ The code explain itself better then I could ever do. """ w_infer = VariableElimination(self._weapons_model) w_dis = w_infer.query(variables=['w'], show_progress=False).values return self._weapons_in_order[list(w_dis).index(max(w_dis))]
def get_most_probable_suspect(self): """ The code explain itself better then I could ever do. """ s_infer = VariableElimination(self._suspects_model) s_dis = s_infer.query(variables=['s'], show_progress=False).values return self._suspects_in_order[list(s_dis).index(max(s_dis))]
def exact_inference(self, filename): result = 0 f = open(filename, 'r') query_variables, evidence_variables = self.__extract_query( f.readline()) eliminate = VariableElimination(self.networks) evidence_variables_mapped = dict() for variable in evidence_variables: evidence_variables_mapped[variable] = self.mapper[variable][ evidence_variables[variable]] query_variables_feature = list(query_variables.keys()) result = eliminate.query(variables=query_variables_feature, evidence=evidence_variables_mapped) value = result.values for feature in result.variables: value = value[result.get_state_no( feature, self.mapper[feature][query_variables[feature]])] f.close() return value
def probnet_inference(model, h, b, d): H = 1 if h > 10 else 0 B = 1 if b > 20 else 0 D = 1 if d > 3 else 0 print(H, B, D) infer = VariableElimination(model) return infer.map_query(['S'], evidence={'H': H, 'B': B, 'D': D})
def mutual_information(self, X, Y, evidence): # if one of the variables is already in the evidence set then return because it makes no sense to calculate if X in evidence or Y in evidence: return -1 # set up inference using variable elimination algorithm from pgmpy.inference import VariableElimination model_infer = VariableElimination(self.model_pgmpy) # calculate joint distribution joint = model_infer.query(variables=[X, Y], evidence=evidence, joint=True) # calculate marginals from joint Y_mar = joint.marginalize([X], inplace=False).values X_mar = joint.marginalize([Y], inplace=False).values # sometimes order of joint table is inverted, I want to guarantee Y on rows if joint.variables[0] != Y: XY_joint = np.transpose(joint.values) else: XY_joint = joint.values from math import log mutual_info = 0 for i in range(len(Y_mar)): for j in range(len(X_mar)): try: mutual_info += XY_joint[i, j] * log(XY_joint[i, j] / (Y_mar[i] * X_mar[j])) except ValueError: # in information theory 0*log(0)=0 so I can skip the value mutual_info = mutual_info return mutual_info
def buildBN(): burglary_model = BayesianModel([('Burglary', 'Alarm'), ('Earthquake', 'Alarm'), ("Alarm", "JohnCalls"), ("Alarm", "MaryCalls")]) cpd_burg = TabularCPD(variable='Burglary', variable_card=2, values=[[.999], [.001]]) # [ P(!B), p(B) ] cpd_earth = TabularCPD(variable='Earthquake', variable_card=2, values=[[.998], [.002]]) # [ P(!E), p(E) ] cpd_alarm = TabularCPD( variable='Alarm', variable_card=2, values=[ [.999, .06, .71, .05], # P(!A|!E,!B), P(!A|!E,B), P(!A|E,!B), P(!A|E,B) [.001, .94, .29, .95] ], # P(A|!E,!B), P(A|!E,B), P(A|E,!B), P(A|E,B) evidence=['Earthquake', 'Burglary'], evidence_card=[2, 2]) cpd_john = TabularCPD( variable="JohnCalls", variable_card=2, values=[[.95, .10], [.05, .90]], # P(!J|!A), P(!J|A) evidence=['Alarm'], evidence_card=[2]) # P(J|!A), P(J|A) cpd_mary = TabularCPD( variable="MaryCalls", variable_card=2, values=[[.99, .30], [.01, .70]], # P(!M|!A), P(!M|A) evidence=['Alarm'], evidence_card=[2]) # P(M|!A), P(M|A) burglary_model.add_cpds(cpd_burg, cpd_earth, cpd_alarm, cpd_john, cpd_mary) # print(burglary_model.check_model()) # print(burglary_model.get_independencies()) # print(burglary_model.edges()) # print(burglary_model.get_cpds()) # Doing exact inference using Variable Elimination burglary_infer = VariableElimination(burglary_model) # using D-interference to determine conditional dependence of B and E given A is observed # print(burglary_model.is_active_trail('Burglary', 'Earthquake')) # print(burglary_model.is_active_trail('Burglary', 'Earthquake', observed=['Alarm'])) # print(burglary_infer.query(variables=['JohnCalls'], joint=False, evidence={'Earthquake': 0})['JohnCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 0})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 1})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1, 'Burglary': 0,"Earthquake": 0})['MaryCalls']) return burglary_infer
def main_brute(file, keep_atts, edges): perms = list(itertools.permutations(keep_atts)) max_widths, cliques, dictionaries, sat_runtimes, total_sat_runtime = find_widths_brute( file, keep_atts, edges) min_tree_width_idx = np.argmin(max_widths) min_tree_width = np.min(max_widths) best_ordering = perms[min_tree_width_idx] best_clique = cliques[min_tree_width_idx] best_dictionary = dictionaries[min_tree_width_idx] graph = create_bayes_net(file, keep_atts, edges) mgraph = graph.to_markov_model() inference = VariableElimination(mgraph) # Perform inference using the best ordering igraph = inference.induced_graph(list(best_ordering)) graph_size = len(keep_atts) density = nx.density(igraph) n_edges = igraph.number_of_edges() worst_runtime = np.max(sat_runtimes) print("Graph size ", graph_size) print("Graph density ", density) print("Graph edges ", n_edges) print("The minimum tree width is ", min_tree_width) print("The best ordering is ", best_ordering) print("The worst case SAT runtime is ", worst_runtime) print("The total SAT runtime is ", total_sat_runtime) return graph_size, density, n_edges, worst_runtime, total_sat_runtime
def setUp(self): self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"), ("J", "Q"), ("J", "L"), ("G", "L")]) cpd_a = TabularCPD("A", 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD("R", 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD( "J", 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=["A", "R"], evidence_card=[2, 2], ) cpd_q = TabularCPD("Q", 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=["J"], evidence_card=[2]) cpd_l = TabularCPD( "L", 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=["J", "G"], evidence_card=[2, 2], ) cpd_g = TabularCPD("G", 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model)
def predict(self, data): """ Predicts states of all the missing variables. Parameters ---------- data : pandas DataFrame object A DataFrame object with column names same as the variables in the model. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> train_data = values[:800] >>> predict_data = values[800:] >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> predict_data = predict_data.copy() >>> predict_data.drop('E', axis=1, inplace=True) >>> y_pred = model.predict(predict_data) >>> y_pred E 800 0 801 1 802 1 803 1 804 0 ... ... 993 0 994 0 995 1 996 1 997 0 998 0 999 0 """ from pgmpy.inference import VariableElimination if set(data.columns) == set(self.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.columns) - set(self.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(self.nodes()) - set(data.columns) pred_values = defaultdict(list) # Send state_names dict from one of the estimated CPDs to the inference class. model_inference = VariableElimination( self, state_names=self.get_cpds()[0].state_names) for index, data_point in data.iterrows(): states_dict = model_inference.map_query( variables=missing_variables, evidence=data_point.to_dict()) for k, v in states_dict.items(): pred_values[k].append(v) return pd.DataFrame(pred_values, index=data.index)
def make_accusation(self): if self._accusation: return self._accusation s_infer = VariableElimination(self._suspects_model) s_dis = s_infer.query(variables=['s'], show_progress=False) w_infer = VariableElimination(self._weapons_model) w_dis = w_infer.query(variables=['w'], show_progress=False) r_infer = VariableElimination(self._rooms_model) r_dis = r_infer.query(variables=['r'], show_progress=False) if entropy(s_dis.values) <= self._accusation_entropy_threshold and \ entropy(w_dis.values) <= self._accusation_entropy_threshold and \ entropy(r_dis.values) <= self._accusation_entropy_threshold: return self._suspects_in_order[list(s_dis.values).index(max(list(s_dis.values)))], \ self._weapons_in_order[list(w_dis.values).index(max(list(w_dis.values)))], \ self._rooms_in_order[list(r_dis.values).index(max(list(r_dis.values)))]
def category_probability(self, category: str) -> float: '''return the probability of the given category''' score = Data.CATEGORY_VALUES.get(category, 0) elimination = VariableElimination(self.model) probability = elimination.query(variables=[Data.CATEGORY_NAME]) state = probability.get_state_no(Data.CATEGORY_NAME, self.categories.get(category, 0)) return probability.values[state]
def get_alarm_prob(bayes_net): """Calculate the marginal probability of the alarm ringing in the power plant system.""" solver = VariableElimination(bayes_net) alarm_prob = solver.query(variables=["alarm"], joint=False) return alarm_prob["alarm"].values[1]
def get_gauge_prob(bayes_net): """Calculate the marginal probability of the gauge showing hot in the power plant system.""" solver = VariableElimination(bayes_net) gauge_prob = solver.query(variables=["gauge"], joint=False) return gauge_prob["gauge"].values[1]
def MH_sampler(bayes_net, initial_state): """ initial_state is a list of length 6 where: index 0-2: represent skills of teams A,B,C (values lie in [0,3] inclusive) index 3-5: represent results of matches AvB, BvC, CvA (values lie in [0,2] inclusive) """ if not initial_state: sample = list(np.random.randint(0, 4, size=[ 3, ])) + [0, random.randint(0, 2), 2] return tuple(sample) sample = list(initial_state) nodes = list(bayes_net.nodes()) val_dict = {node: val for val, node in zip(sample, nodes)} solver = VariableElimination(bayes_net) for idx, val in enumerate(sample): evid = { node: val for node, val in zip(nodes, sample) if node != nodes[idx] } if idx < 3: weights = [norm.pdf(i, sample[idx], 1) for i in range(4)] new_val = random.choices([0, 1, 2, 3], weights=weights)[0] prob = solver.query(variables=[nodes[idx]], evidence={ 'AvB': 0, 'CvA': 2 }, joint=False, show_progress=False) # prob = solver.query(variables=[nodes[idx]], evidence=evid, joint=False, show_progress=False) prob = prob[nodes[idx]].values prob1 = prob[new_val] prob0 = prob[val] elif idx == 4: weights = [norm.pdf(i, sample[idx], 1) for i in range(3)] new_val = random.choices([0, 1, 2], weights=weights)[0] prob = solver.query(variables=[nodes[idx]], evidence={ 'AvB': 0, 'CvA': 2 }, joint=False, show_progress=False) # prob = solver.query(variables=['BvC'], evidence=evid, joint=False, show_progress=False) prob = prob['BvC'].values prob1 = prob[new_val] prob0 = prob[val] else: continue alpha = min(1.0, prob1 / prob0) if random.uniform(0, 1) < alpha: sample[idx] = new_val return tuple(sample)
def start(self): cpd_difficulty = TabularCPD(variable='Difficulty', variable_card=2, values=[[0.6], [0.4]]) cpd_musicianship = TabularCPD(variable='Musicianship', variable_card=2, values=[[0.7], [0.3]]) cpd_Rating = TabularCPD(variable='Rating', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['Difficulty', 'Musicianship'], evidence_card=[2, 2]) cpd_Exam = TabularCPD(variable='Exam', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['Musicianship'], evidence_card=[2]) cpd_Letter = TabularCPD(variable='Letter', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['Rating'], evidence_card=[3]) self.musicModel.add_cpds(cpd_difficulty, cpd_musicianship, cpd_Rating, cpd_Exam, cpd_Letter) print(self.musicModel.check_model()) infer = VariableElimination(self.musicModel) examResult = infer.query(variables=['Exam'], evidence={'Musicianship': 1})['Exam'] musicResult = infer.query(variables=['Musicianship'])['Musicianship'] ratingResult = infer.query(variables=['Rating'], evidence={ 'Musicianship': 1, 'Difficulty': 0 })['Rating'] diffResult = infer.query(variables=['Difficulty'])['Difficulty'] letterResult = infer.query(variables=['Letter'], evidence={'Rating': 1})['Letter'] print(examResult) print(musicResult) print(ratingResult) print(diffResult) print(letterResult) # THIS IS NOT WORKING IT IS PART 2 Getting weird results letterNoOtherEvidence = infer.query(variables=['Letter'])['Letter'] letterResult = infer.query(variables=['Letter'], evidence={'Musicianship': 0})['Letter'] print(letterNoOtherEvidence) print(letterResult) print('')