def setup_titanic(): # Build a model of the titanic disaster global titanic_network, passenger, gender, tclass # Passengers on the Titanic either survive or perish passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4}) # Gender, given survival data gender = ConditionalProbabilityTable( [['survive', 'male', 0.0], ['survive', 'female', 1.0], ['perish', 'male', 1.0], ['perish', 'female', 0.0]], [passenger]) # Class of travel, given survival data tclass = ConditionalProbabilityTable( [['survive', 'first', 0.0], ['survive', 'second', 1.0], ['survive', 'third', 0.0], ['perish', 'first', 1.0], ['perish', 'second', 0.0], ['perish', 'third', 0.0]], [passenger]) # State objects hold both the distribution, and a high level name. s1 = State(passenger, name="passenger") s2 = State(gender, name="gender") s3 = State(tclass, name="class") # Create the Bayesian network object with a useful name titanic_network = BayesianNetwork("Titanic Disaster") # Add the three nodes to the network titanic_network.add_nodes(s1, s2, s3) # Add transitions which represent conditional dependencies, where the # second node is conditionally dependent on the first node (Monty is # dependent on both guest and prize) titanic_network.add_edge(s1, s2) titanic_network.add_edge(s1, s3) titanic_network.bake()
def test_cpd_sampling(): d1 = DiscreteDistribution({"A": 0.1, "B": 0.9}) d2 = ConditionalProbabilityTable( [["A", "A", 0.1], ["A", "B", 0.9], ["B", "A", 0.7], ["B", "B", 0.3]], [d1]) # P(A) = 0.1*0.1 + 0.9*0.7 = 0.64 # P(B) = 0.1*0.9 + 0.9*0.3 = 0.36 true = [0.64, 0.36] est = numpy.bincount([0 if d2.sample() == "A" else 1 for i in range(1000)]) / 1000.0 assert_almost_equal(est[0], true[0], 1) assert_almost_equal(est[1], true[1], 1) # when A is observed, it reduces to [0.1, 0.9] true1 = [0.1, 0.9] par_val = {} par_val[d1] = "A" est = numpy.bincount( [0 if d2.sample(parent_values=par_val) == "A" else 1 for i in range(1000)] ) / 1000.0 assert_almost_equal(est[0], true1[0], 1) assert_almost_equal(est[1], true1[1], 1) true2= [0.7, 0.3] par_val = {} par_val[d1] = "B" est = numpy.bincount( [0 if d2.sample(parent_values=par_val) == "A" else 1 for i in range(1000)] ) / 1000.0 assert_almost_equal(est[0], true2[0], 1) assert_almost_equal(est[1], true2[1], 1)
def get_bayesnet(self): door_lock = DiscreteDistribution({'d1': 0.7, 'd2': 0.3}) clock_alarm = DiscreteDistribution( { 'a1' : 0.8, 'a2' : 0.2} ) light = ConditionalProbabilityTable( [[ 'd1', 'a1', 'l1', 0.96 ], ['d1', 'a1', 'l2', 0.04 ], [ 'd1', 'a2', 'l1', 0.89 ], [ 'd1', 'a2', 'l2', 0.11 ], [ 'd2', 'a1', 'l1', 0.96 ], [ 'd2', 'a1', 'l2', 0.04 ], [ 'd2', 'a2', 'l1', 0.89 ], [ 'd2', 'a2', 'l2', 0.11 ]], [door_lock, clock_alarm]) coffee_maker = ConditionalProbabilityTable( [[ 'a1', 'c1', 0.92 ], [ 'a1', 'c2', 0.08 ], [ 'a2', 'c1', 0.03 ], [ 'a2', 'c2', 0.97 ]], [clock_alarm] ) s_door_lock = State(door_lock, name="door_lock") s_clock_alarm = State(clock_alarm, name="clock_alarm") s_light = State(light, name="light") s_coffee_maker = State(coffee_maker, name="coffee_maker") network = BayesianNetwork("User_pref") network.add_nodes(s_door_lock, s_clock_alarm, s_light, s_coffee_maker) network.add_edge(s_door_lock,s_light) network.add_edge(s_clock_alarm,s_coffee_maker) network.add_edge(s_clock_alarm,s_light) network.bake() return network
def test_cpd_sampling(): d1 = DiscreteDistribution({"A": 0.1, "B": 0.9}) d2 = ConditionalProbabilityTable( [["A", "A", 0.1], ["A", "B", 0.9], ["B", "A", 0.7], ["B", "B", 0.3]], [d1]) # P(A) = 0.1*0.1 + 0.9*0.7 = 0.64 # P(B) = 0.1*0.9 + 0.9*0.3 = 0.36 true = [0.64, 0.36] est = numpy.bincount([0 if d2.sample() == "A" else 1 for i in range(1000)]) / 1000.0 assert_almost_equal(est[0], true[0], 1) assert_almost_equal(est[1], true[1], 1) # when A is observed, it reduces to [0.1, 0.9] true1 = [0.1, 0.9] par_val = {} par_val[d1] = "A" est = numpy.bincount([ 0 if d2.sample(parent_values=par_val) == "A" else 1 for i in range(1000) ]) / 1000.0 assert_almost_equal(est[0], true1[0], 1) assert_almost_equal(est[1], true1[1], 1) true2 = [0.7, 0.3] par_val = {} par_val[d1] = "B" est = numpy.bincount([ 0 if d2.sample(parent_values=par_val) == "A" else 1 for i in range(1000) ]) / 1000.0 assert_almost_equal(est[0], true2[0], 1) assert_almost_equal(est[1], true2[1], 1)
def test_conditional(): phditis = DiscreteDistribution({True: 0.01, False: 0.99}) test_result = ConditionalProbabilityTable( [[True, True, 0.95], [True, False, 0.05], [False, True, 0.05], [False, False, 0.95]], [phditis]) assert discrete_equality(test_result.marginal(), DiscreteDistribution({False: 0.941, True: 0.059}))
def test_distributions_cpt_random_sample(): d1 = DiscreteDistribution({"A": 0.1, "B": 0.9}) d = ConditionalProbabilityTable( [["A", "A", 0.1], ["A", "B", 0.9], ["B", "A", 0.7], ["B", "B", 0.3]], [d1]) x = numpy.array(['B', 'A', 'B', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A']) assert_array_equal(d.sample(n=20, random_state=5), x) assert_raises(AssertionError, assert_array_equal, d.sample(n=10), x)
def __init__(self): Pollution = DiscreteDistribution({'F': 0.9, 'T': 0.1}) Smoker = DiscreteDistribution({'T': 0.3, 'F': 0.7}) print(Smoker) Cancer = ConditionalProbabilityTable([ ['T', 'T', 'T', 0.05], ['T', 'T', 'F', 0.95], ['T', 'F', 'T', 0.02], ['T', 'F', 'F', 0.98], ['F', 'T', 'T', 0.03], ['F', 'T', 'F', 0.97], ['F', 'F', 'T', 0.001], ['F', 'F', 'F', 0.999], ], [Pollution, Smoker]) print(Cancer) XRay = ConditionalProbabilityTable([ ['T', 'T', 0.9], ['T', 'F', 0.1], ['F', 'T', 0.2], ['F', 'F', 0.8], ], [Cancer]) Dyspnoea = ConditionalProbabilityTable([ ['T', 'T', 0.65], ['T', 'F', 0.35], ['F', 'T', 0.3], ['F', 'F', 0.7], ], [Cancer]) s1 = Node(Pollution, name="Pollution") s2 = Node(Smoker, name="Smoker") s3 = Node(Cancer, name="Cancer") s4 = Node(XRay, name="XRay") s5 = Node(Dyspnoea, name="Dyspnoea") model = BayesianNetwork("Lung Cancer") model.add_states(s1, s2, s3, s4, s5) model.add_edge(s1, s3) model.add_edge(s2, s3) model.add_edge(s3, s4) model.add_edge(s3, s5) model.bake() self.model = model meta = [] name_mapper = ["Pollution", "Smoker", "Cancer", "XRay", "Dyspnoea"] for i in range(self.model.node_count()): meta.append({ "name": name_mapper[i], "type": "categorical", "size": 2, "i2s": ['T', 'F'] }) self.meta = meta
def __init__(self): A = DiscreteDistribution({'1': 1. / 3, '2': 1. / 3, '3': 1. / 3}) B = ConditionalProbabilityTable([ ['1', '1', 0.5], ['1', '2', 0.5], ['1', '3', 0], ['2', '1', 0], ['2', '2', 0.5], ['2', '3', 0.5], ['3', '1', 0.5], ['3', '2', 0], ['3', '3', 0.5], ], [A]) C = ConditionalProbabilityTable([ ['1', '4', 0.5], ['1', '5', 0.5], ['1', '6', 0], ['2', '4', 0], ['2', '5', 0.5], ['2', '6', 0.5], ['3', '4', 0.5], ['3', '5', 0], ['3', '6', 0.5], ], [A]) s1 = Node(A, name="A") s2 = Node(B, name="B") s3 = Node(C, name="C") model = BayesianNetwork("tree") model.add_states(s1, s2, s3) model.add_edge(s1, s2) model.add_edge(s1, s3) model.bake() self.model = model meta = [] for i in range(self.model.node_count() - 1): meta.append({ "name": chr(ord('A') + i), "type": "categorical", "size": 3, "i2s": ['1', '2', '3'] }) meta.append({ "name": "C", "type": "categorical", "size": 3, "i2s": ['4', '5', '6'] }) self.meta = meta
def test_io_fit(): d1 = DiscreteDistribution({True: 0.6, False: 0.4}) d2 = ConditionalProbabilityTable([ [True, 'A', 0.2], [True, 'B', 0.8], [False, 'A', 0.3], [False, 'B', 0.7]], [d1]) d3 = ConditionalProbabilityTable([ ['A', 0, 0.3], ['A', 1, 0.7], ['B', 0, 0.8], ['B', 1, 0.2]], [d2]) n1 = Node(d1) n2 = Node(d2) n3 = Node(d3) model1 = BayesianNetwork() model1.add_nodes(n1, n2, n3) model1.add_edge(n1, n2) model1.add_edge(n2, n3) model1.bake() model1.fit(X, weights=weights) d1 = DiscreteDistribution({True: 0.2, False: 0.8}) d2 = ConditionalProbabilityTable([ [True, 'A', 0.7], [True, 'B', 0.2], [False, 'A', 0.4], [False, 'B', 0.6]], [d1]) d3 = ConditionalProbabilityTable([ ['A', 0, 0.9], ['A', 1, 0.1], ['B', 0, 0.0], ['B', 1, 1.0]], [d2]) n1 = Node(d1) n2 = Node(d2) n3 = Node(d3) model2 = BayesianNetwork() model2.add_nodes(n1, n2, n3) model2.add_edge(n1, n2) model2.add_edge(n2, n3) model2.bake() model2.fit(data_generator) logp1 = model1.log_probability(X) logp2 = model2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def setup_monty(): # Build a model of the Monty Hall Problem global monty_network, monty_index, prize_index, guest_index random.seed(0) # Friends emissions are completely random guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # The actual prize is independent of the other distributions prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # Monty is dependent on both the guest and the prize. monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [guest, prize]) # Make the states s1 = State(guest, name="guest") s2 = State(prize, name="prize") s3 = State(monty, name="monty") # Make the bayes net, add the states, and the conditional dependencies. monty_network = BayesianNetwork("test") monty_network.add_nodes(s1, s2, s3) monty_network.add_edge(s1, s3) monty_network.add_edge(s2, s3) monty_network.bake() monty_index = monty_network.states.index(s3) prize_index = monty_network.states.index(s2) guest_index = monty_network.states.index(s1)
def create_con_prob_table(num_prereqs, num_grades, states): # Creates the cartesian product of the grades as a DataFrame df_events = create_cartesian_table(num_grades, num_prereqs + 1) # Adds a column of probabilities as floats to the DataFrame df_events[len(df_events.columns)] = 1 / num_grades return ConditionalProbabilityTable(df_events.values.tolist(), get_disc_dist_list(states))
def __init__(self): Rain = DiscreteDistribution({'T': 0.2, 'F': 0.8}) Sprinkler = ConditionalProbabilityTable([ ['F', 'T', 0.4], ['F', 'F', 0.6], ['T', 'T', 0.1], ['T', 'F', 0.9], ], [Rain]) Wet = ConditionalProbabilityTable([ ['F', 'F', 'T', 0.01], ['F', 'F', 'F', 0.99], ['F', 'T', 'T', 0.8], ['F', 'T', 'F', 0.2], ['T', 'F', 'T', 0.9], ['T', 'F', 'F', 0.1], ['T', 'T', 'T', 0.99], ['T', 'T', 'F', 0.01], ], [Sprinkler, Rain]) s1 = Node(Rain, name="Rain") s2 = Node(Sprinkler, name="Sprinkler") s3 = Node(Wet, name="Wet") model = BayesianNetwork("Simple fully connected") model.add_states(s1, s2, s3) model.add_edge(s1, s2) model.add_edge(s1, s3) model.add_edge(s2, s3) model.bake() self.model = model meta = [] for i in range(self.model.node_count()): meta.append({ "name": None, "type": "categorical", "size": 2, "i2s": ['T', 'F'] }) meta[0]['name'] = 'Rain' meta[1]['name'] = 'Sprinkler' meta[2]['name'] = 'Wet' self.meta = meta
def build_cpts(dfs): cpts = dict() # maps the name of the node to its cpt for node, df in dfs: _, parents, values = get_metadata_of(node) if not any(parents): # if we have only two columns, DiscreteDistribution cpts[node] = DiscreteDistribution(dict(df.values)) else: cpts[node] = ConditionalProbabilityTable( df.values, [cpts[parent] for parent in parents]) return cpts
def export_probabilities(self, parents=None): if self.get_is_conditional(): probs = [] for item in self._items: probs.append(item.export_probabilities()) out = ConditionalProbabilityTable(probs, parents) else: probs = {} for item in self._items: probs[item.get_outcome()] = item.get_probability() out = DiscreteDistribution(probs) return out
def setup_cpt(): guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) global monty monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [guest, prize]) global X X = [['A', 'A', 'C'], ['A', 'A', 'B'], ['A', 'A', 'C'], ['A', 'A', 'B'], ['A', 'A', 'A'], ['A', 'B', 'A'], ['A', 'B', 'A'], ['A', 'B', 'B'], ['A', 'B', 'C'], ['A', 'C', 'A'], ['A', 'C', 'C'], ['A', 'C', 'C'], ['A', 'C', 'C'], ['A', 'C', 'B'], ['B', 'A', 'A'], ['B', 'A', 'B'], ['B', 'A', 'B'], ['B', 'A', 'B'], ['B', 'B', 'B'], ['B', 'B', 'C'], ['B', 'C', 'A'], ['B', 'C', 'B'], ['B', 'C', 'A'], ['B', 'C', 'B'], ['C', 'A', 'B'], ['C', 'B', 'B'], ['C', 'B', 'C'], ['C', 'C', 'A'], ['C', 'C', 'C'], ['C', 'C', 'C'], ['C', 'C', 'C']] global X_nan X_nan = [['nan', 'A', 'C'], ['A', 'A', 'nan'], ['A', 'nan', 'C'], ['A', 'A', 'B'], ['A', 'A', 'A'], ['A', 'B', 'nan'], ['A', 'B', 'A'], ['A', 'B', 'nan'], ['A', 'B', 'C'], ['A', 'C', 'A'], ['A', 'nan', 'C'], ['A', 'C', 'C'], ['A', 'C', 'C'], ['A', 'C', 'B'], ['B', 'nan', 'A'], ['B', 'A', 'B'], ['nan', 'A', 'B'], ['B', 'A', 'B'], ['B', 'B', 'B'], ['B', 'B', 'C'], ['B', 'C', 'A'], ['nan', 'C', 'B'], ['B', 'C', 'A'], ['nan', 'C', 'B'], ['C', 'A', 'B'], ['C', 'B', 'B'], ['C', 'nan', 'C'], ['C', 'nan', 'A'], ['C', 'nan', 'C'], ['C', 'C', 'C'], ['C', 'C', 'C']]
def __get_bayesian_network_model( self, symptom_distributions: List, symptom_states: List, file_name: str, disease_name: str, ): disease_conditional_distribution = list() for (s1, s2, s3, s4, s5, d, p) in get_from_csv(file_name): disease_conditional_distribution.append( [s1, s2, s3, s4, s5, d, float(p)]) disease_distribution = ConditionalProbabilityTable( disease_conditional_distribution, symptom_distributions, ) disease = Node(disease_distribution, name=disease_name) model = BayesianNetwork(disease_name) model.add_state(disease) for symptom_state in symptom_states: model.add_state(symptom_state) model.add_edge(symptom_state, disease) model.bake() return model
def worker(node: Type[BaseNode]) -> DiscreteParams: parents = node.disc_parents + node.cont_parents if not parents: dist = DiscreteDistribution.from_samples( data[node.name].values) cprob = list(dict(sorted(dist.items())).values()) vals = sorted( [str(x) for x in list(dist.parameters[0].keys())]) else: dist = DiscreteDistribution.from_samples( data[node.name].values) vals = sorted( [str(x) for x in list(dist.parameters[0].keys())]) dist = ConditionalProbabilityTable.from_samples( data[parents + [node.name]].values) params = dist.parameters[0] cprob = dict() for i in range(0, len(params), len(vals)): probs = [] for j in range(i, (i + len(vals))): probs.append(params[j][-1]) combination = [str(x) for x in params[i][0:len(parents)]] cprob[str(combination)] = probs return {"cprob": cprob, 'vals': vals}
from pomegranate import DiscreteDistribution from pomegranate import ConditionalProbabilityTable from pomegranate import BayesianNetwork from pomegranate import Node guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [guest, prize]) s1 = Node(guest, name="guest") s2 = Node(prize, name="prize") s3 = Node(monty, name="monty") model = BayesianNetwork("Monty Hall Problem") model.add_states(s1, s2, s3) model.add_edge(s1, s3) model.add_edge(s2, s3) model.bake()
def __init__(self, filename): with open(filename) as f: bif = f.read() vars = re.findall(r"variable[^\{]+{[^\}]+}", bif) probs = re.findall(r"probability[^\{]+{[^\}]+}", bif) var_nodes = {} var_index_to_name = [] edges = [] self.meta = [] todo = set() for v, p in zip(vars, probs): m = re.search(r"variable\s+([^\{\s]+)\s+", v) v_name = m.group(1) m = re.search(r"type\s+discrete\s+\[\s*(\d+)\s*\]\s*\{([^\}]+)\}", v) v_opts_n = int(m.group(1)) v_opts = m.group(2).replace(',', ' ').split() assert v_opts_n == len(v_opts) # print(v_name, v_opts_n, v_opts) m = re.search(r"probability\s*\(([^)]+)\)", p) cond = m.group(1).replace('|', ' ').replace(',', ' ').split() assert cond[0] == v_name # print(cond) self.meta.append({ "name": v_name, "type": "categorical", "size": v_opts_n, "i2s": v_opts }) if len(cond) == 1: m = re.search(r"table([e\-\d\.\s,]*);", p) margin_p = m.group(1).replace(',', ' ').split() margin_p = [float(x) for x in margin_p] assert abs(sum(margin_p) - 1) < 1e-6 assert len(margin_p) == v_opts_n margin_p = dict(zip(v_opts, margin_p)) var_index_to_name.append(v_name) tmp = DiscreteDistribution(margin_p) # print(tmp) var_nodes[v_name] = tmp else: m_iter = re.finditer(r"\(([^)]*)\)([\s\d\.,\-e]+);", p) cond_p_table = [] for m in m_iter: cond_values = m.group(1).replace(',', ' ').split() cond_p = m.group(2).replace(',', ' ').split() cond_p = [float(x) for x in cond_p] assert len(cond_values) == len(cond) - 1 assert len(cond_p) == v_opts_n assert abs(sum(cond_p) - 1) < 1e-6 for opt, opt_p in zip(v_opts, cond_p): cond_p_table.append(cond_values + [opt, opt_p]) var_index_to_name.append(v_name) tmp = (cond_p_table, cond) # print(tmp) var_nodes[v_name] = tmp for x in cond[1:]: edges.append((x, v_name)) todo.add(v_name) while len(todo) > 0: # print(todo) for v_name in todo: # print(v_name, type(var_nodes[v_name])) cond_p_table, cond = var_nodes[v_name] flag = True for y in cond[1:]: if y in todo: flag = False break if flag: cond_t = [var_nodes[x] for x in cond[1:]] var_nodes[v_name] = ConditionalProbabilityTable( cond_p_table, cond_t) todo.remove(v_name) break for x in var_index_to_name: var_nodes[x] = Node(var_nodes[x], name=x) var_nodes_list = [var_nodes[x] for x in var_index_to_name] # print(var_nodes_list) model = BayesianNetwork("tmp") model.add_states(*var_nodes_list) for edge in edges: model.add_edge(var_nodes[edge[0]], var_nodes[edge[1]]) model.bake() # print(model.to_json()) self.model = model
def get_BN(self, DAG, child_parent): #1. get DAG structure as an arguments ################################################ node_without_parents = [ e for e in self.nodes if e not in child_parent.keys() ] # 2 Build BN probability model # 2.1 get probabilityDist or conditional prob table # bais the prob to task_dict choices node_prob_dict = self.get_nodes_prob_dist(node_without_parents, child_parent) self.npd = node_prob_dict # 2.2 Create nodes linked to its parent, parent should be processed first. # all node state saved to be added to the BN later nodes_state = {} # all node dist or CPT saved to link child to parents when building child CPT nodes_dist = {} # start with root nodes (don't have parents then link child to them) # list the list to copy it, otherwise it will point to the self.nodes remaining_nodes_list = list(self.nodes) for node in node_without_parents: prob_dist = node_prob_dict[node] # print("Parent", node, prob_dist) node_dist = DiscreteDistribution(prob_dist) nodes_dist[node] = node_dist nodes_state[node] = State(node_dist, name=node) # remove from nodes_list remaining_nodes_list.remove(node) # rest of the node should have parents while len(remaining_nodes_list) > 0: for node, parent_lst in child_parent.items(): # if node's parents already created then it can be created now if set(parent_lst).issubset(nodes_state.keys()) and \ node in remaining_nodes_list: # print("parent child", parent_lst, node, node_prob_dict[node]) node_dist = ConditionalProbabilityTable(node_prob_dict[node], \ [nodes_dist[i] for i in parent_lst]) nodes_dist[node] = node_dist nodes_state[node] = State(node_dist, name=node) # remove from the node_list remaining_nodes_list.remove(node) # 3 Create BN and add the nodes_state self.network = BayesianNetwork("User_pref") for node, state in nodes_state.items(): self.network.add_node(state) #print("node ", node, " is added!") self.BN_node_orders.append(node) # 4 Link nodes with edges using nodes_state and DAG.edge for a, bs in DAG.edge.items(): for b in bs.keys(): self.network.add_edge(nodes_state[a], nodes_state[b]) # print("Netwoerk:", a, b) # print("Network has ", self.network.node_count() , " nodes and ", self.network.edge_count(), " edges") return self.network
def test_monty(): guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # The actual prize is independent of the other distributions prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3}) # Monty is dependent on both the guest and the prize. monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [guest, prize]) assert_equal(monty.log_probability(('A', 'B', 'C')), 0.) assert_equal(monty.log_probability(('C', 'B', 'A')), 0.) assert_equal(monty.log_probability(('C', 'C', 'C')), float("-inf")) assert_equal(monty.log_probability(('A', 'A', 'A')), float("-inf")) assert_equal(monty.log_probability(('B', 'A', 'C')), 0.) assert_equal(monty.log_probability(('C', 'A', 'B')), 0.) data = [['A', 'A', 'C'], ['A', 'A', 'C'], ['A', 'A', 'B'], ['A', 'A', 'A'], ['A', 'A', 'C'], ['B', 'B', 'B'], ['B', 'B', 'C'], ['C', 'C', 'A'], ['C', 'C', 'C'], ['C', 'C', 'C'], ['C', 'C', 'C'], ['C', 'B', 'A']] monty.fit(data, weights=[1, 1, 3, 3, 1, 1, 3, 7, 1, 1, 1, 1]) assert_equal(monty.log_probability(('A', 'A', 'A')), monty.log_probability(('A', 'A', 'C'))) assert_equal(monty.log_probability(('A', 'A', 'A')), monty.log_probability(('A', 'A', 'B'))) assert_equal(monty.log_probability(('B', 'A', 'A')), monty.log_probability(('B', 'A', 'C'))) assert_equal(monty.log_probability(('B', 'B', 'A')), float("-inf")) assert_equal(monty.log_probability(('C', 'C', 'B')), float("-inf"))
# ["T", "F", 0.45863], # ["F", "T", 1 - 0.845], # ["F", "F", 1 - 0.45863], # ] coughing_allergy_lung_cancer = [ ["T", "F", "F", 0.13], ["T", "T", "F", 0.64], ["T", "F", "T", 0.76], ["T", "T", "T", 0.99], ["F", "F", "F", 1 - 0.13], ["F", "T", "F", 1 - 0.64], ["F", "F", "T", 1 - 0.76], ["F", "T", "T", 1 - 0.99], ] Attention_Disorder = ConditionalProbabilityTable(table=attention_genetics, parents=[Genetics]) Smoking = ConditionalProbabilityTable(table=smoking_peer_pressure_anxiety, parents=[Peer_Pressure, Anxiety]) Lung_cancer = ConditionalProbabilityTable(table=lung_cancer_genetics_smoking, parents=[Genetics, Smoking]) Coughing = ConditionalProbabilityTable(table=coughing_allergy_lung_cancer, parents=[Allergy, Lung_cancer]) Yellow_Fingers = ConditionalProbabilityTable(table=yellow_fingers_smoking, parents=[Smoking]) Fatigue = ConditionalProbabilityTable(table=fatigue_lung_cancer_coughing, parents=[Lung_cancer,Coughing]) Car_Accident = ConditionalProbabilityTable(table=car_accident_attention_fatigue, parents=[Attention_Disorder, Fatigue]) states = {} states['Anxiety'] = State(Anxiety, name="Anxiety") states['Peer_Pressure'] = State(Peer_Pressure, name="Peer_Pressure")
from pomegranate import DiscreteDistribution from pomegranate import ConditionalProbabilityTable from pomegranate import BayesianNetwork from pomegranate import Node # Rain node has no parents rain = Node(DiscreteDistribution({ "none": 0.7, "light": 0.2, "heavy": 0.1 }), name="rain") # Track maintenance node is coditional on rain maintenance = Node(ConditionalProbabilityTable( [["none", "yes", 0.4], ["none", "no", 0.6], ["light", "yes", 0.2], ["light", "no", 0.8], ["heavy", "yes", 0.1], ["heavy", "no", 0.9]], [rain.distribution]), name="maintenance") # Train Node is conditional on rain, and maintenance train = Node(ConditionalProbabilityTable([ ["none", "yes", "on time", 0.8], ["none", "yes", "delayed", 0.2], ["none", "no", "on time", 0.9], ["none", "no", "delayed", 0.1], ["light", "yes", "on time", 0.6], ["light", "yes", "delayed", 0.4], ["light", "no", "on time", 0.7], ["light", "no", "delayed", 0.3], ["heavy", "yes", "on time", 0.4], ["heavy", "yes", "delayed", 0.6],
def setup_huge_monty(): # Build the huge monty hall huge_monty_network. This is an example I made # up with which may not exactly flow logically, but tests a varied type of # tables ensures heterogeneous types of data work together. global huge_monty_network, huge_monty_friend, huge_monty_guest, huge_monty global huge_monty_remaining, huge_monty_randomize, huge_monty_prize # Huge_Monty_Friend huge_monty_friend = DiscreteDistribution({True: 0.5, False: 0.5}) # Huge_Monty_Guest emisisons are completely random huge_monty_guest = ConditionalProbabilityTable( [[True, 'A', 0.50], [True, 'B', 0.25], [True, 'C', 0.25], [False, 'A', 0.0], [False, 'B', 0.7], [False, 'C', 0.3]], [huge_monty_friend]) # Number of huge_monty_remaining cars huge_monty_remaining = DiscreteDistribution({0: 0.1, 1: 0.7, 2: 0.2, }) # Whether they huge_monty_randomize is dependent on the numnber of # huge_monty_remaining cars huge_monty_randomize = ConditionalProbabilityTable( [[0, True, 0.05], [0, False, 0.95], [1, True, 0.8], [1, False, 0.2], [2, True, 0.5], [2, False, 0.5]], [huge_monty_remaining]) # Where the huge_monty_prize is depends on if they huge_monty_randomize or # not and also the huge_monty_guests huge_monty_friend huge_monty_prize = ConditionalProbabilityTable( [[True, True, 'A', 0.3], [True, True, 'B', 0.4], [True, True, 'C', 0.3], [True, False, 'A', 0.2], [True, False, 'B', 0.4], [True, False, 'C', 0.4], [False, True, 'A', 0.1], [False, True, 'B', 0.9], [False, True, 'C', 0.0], [False, False, 'A', 0.0], [False, False, 'B', 0.4], [False, False, 'C', 0.6]], [huge_monty_randomize, huge_monty_friend]) # Monty is dependent on both the huge_monty_guest and the huge_monty_prize. huge_monty = ConditionalProbabilityTable( [['A', 'A', 'A', 0.0], ['A', 'A', 'B', 0.5], ['A', 'A', 'C', 0.5], ['A', 'B', 'A', 0.0], ['A', 'B', 'B', 0.0], ['A', 'B', 'C', 1.0], ['A', 'C', 'A', 0.0], ['A', 'C', 'B', 1.0], ['A', 'C', 'C', 0.0], ['B', 'A', 'A', 0.0], ['B', 'A', 'B', 0.0], ['B', 'A', 'C', 1.0], ['B', 'B', 'A', 0.5], ['B', 'B', 'B', 0.0], ['B', 'B', 'C', 0.5], ['B', 'C', 'A', 1.0], ['B', 'C', 'B', 0.0], ['B', 'C', 'C', 0.0], ['C', 'A', 'A', 0.0], ['C', 'A', 'B', 1.0], ['C', 'A', 'C', 0.0], ['C', 'B', 'A', 1.0], ['C', 'B', 'B', 0.0], ['C', 'B', 'C', 0.0], ['C', 'C', 'A', 0.5], ['C', 'C', 'B', 0.5], ['C', 'C', 'C', 0.0]], [huge_monty_guest, huge_monty_prize]) # Make the states s0 = State(huge_monty_friend, name="huge_monty_friend") s1 = State(huge_monty_guest, name="huge_monty_guest") s2 = State(huge_monty_prize, name="huge_monty_prize") s3 = State(huge_monty, name="huge_monty") s4 = State(huge_monty_remaining, name="huge_monty_remaining") s5 = State(huge_monty_randomize, name="huge_monty_randomize") # Make the bayes net, add the states, and the conditional dependencies. huge_monty_network = BayesianNetwork("test") huge_monty_network.add_nodes(s0, s1, s2, s3, s4, s5) huge_monty_network.add_transition(s0, s1) huge_monty_network.add_transition(s1, s3) huge_monty_network.add_transition(s2, s3) huge_monty_network.add_transition(s4, s5) huge_monty_network.add_transition(s5, s2) huge_monty_network.add_transition(s0, s2) huge_monty_network.bake()
blanket = set() for n in node.parents: blanket.add(n) for n in node.childeren: blanket.add(n) for parN in n.parents: if n != node: blanket.add(parN) return blanket first = DiscreteDistribution({1: 1. / 2, 0: 1. / 2}) second = DiscreteDistribution({1: 1. / 2, 0: 1. / 2}) mainNode = ConditionalProbabilityTable( [[1, 1, 1, 0.4], [1, 1, 0, 0.6], [1, 0, 1, 0.9], [1, 0, 0, 0.1], [0, 1, 1, 0.9], [0, 1, 0, 0.1], [0, 0, 1, 0.4], [0, 0, 0, 0.6]], [first, second]) four = ConditionalProbabilityTable( [[1, 1, 0.4], [1, 0, 0.6], [0, 0, 0.6], [0, 1, 0.4]], [mainNode]) five = ConditionalProbabilityTable( [[1, 1, 0.4], [1, 0, 0.6], [0, 0, 0.6], [0, 1, 0.4]], [mainNode]) s1 = BNode(first, False, name="first") s2 = BNode(second, False, name="second") s3 = BNode(mainNode, True, name="mainNode") s4 = BNode(four, True, name="dd") s5 = BNode(five, True, name='ee') rng = RandomNumberGenerator() rng.add_edge(s1, s3)
print(t) return t.values.tolist() def singleVariable(target): oneValue = df[target].value_counts()[1] / len(df) or 0 return {0: 1 - oneValue, 1: oneValue} anxiety = DiscreteDistribution(singleVariable("Anxiety")) peer_pressure = DiscreteDistribution(singleVariable("Peer_Pressure")) genetics = DiscreteDistribution(singleVariable("Genetics")) allergy = DiscreteDistribution(singleVariable("Allergy")) smoking = ConditionalProbabilityTable( buildCpt("Smoking", ["Anxiety", "Peer_Pressure"]), [anxiety, peer_pressure]) lung_cancer = ConditionalProbabilityTable( buildCpt("Lung_cancer", ["Smoking", "Genetics"]), [smoking, genetics]) yellow_fingers = ConditionalProbabilityTable( buildCpt("Yellow_Fingers", ["Smoking"]), [smoking]) attention_disorder = ConditionalProbabilityTable( buildCpt("Attention_Disorder", ["Genetics"]), [genetics]) coughing = ConditionalProbabilityTable( buildCpt("Coughing", ["Allergy", "Lung_cancer"]), [allergy, lung_cancer]) fatigue = ConditionalProbabilityTable( buildCpt("Fatigue", ["Lung_cancer", "Coughing"]), [lung_cancer, coughing]) car_accident = ConditionalProbabilityTable( buildCpt("Car_Accident", ["Fatigue", "Attention_Disorder"]), [fatigue, attention_disorder], )
condProbDict.append([j, k, val]) return condProbDict arr = returnConditionalProbability(df, 'Location', 'Ashwin') arr arr = returnConditionalProbability(df, 'Toss', 'Bat') arr arr = returnConditionalProbability(df, 'Bat', 'Result') arr location = DiscreteDistribution(returnPriorProbability(df, 'Location')) toss = DiscreteDistribution(returnPriorProbability(df, 'Toss')) ashwin = ConditionalProbabilityTable( returnConditionalProbability(df, 'Location', 'Ashwin'), [location]) batting = ConditionalProbabilityTable( returnConditionalProbability(df, 'Toss', 'Bat'), [toss]) result = ConditionalProbabilityTable( returnConditionalProbability(df, 'Bat', 'Result'), [batting]) sLocation = State(location, name="Location") sToss = State(toss, name="Toss") sBatting = State(batting, name="Batting") sAshwin = State(ashwin, name="Ashwin") sResult = State(result, name="Result") # Create the Bayesian network object with a useful name model = BayesianNetwork("Ashwin Playing Problem") # Add the three states to the network