Exemple #1
0
def vetor_Rede(solucao, nodes):
    G_aux = BayesianModel()
    #G_aux.add_nodes_from(nodes)
    k = 0
    aux = 1
    for i in range(1, len(nodes)):
        for j in range(aux):
            if solucao[k] == 1:
                if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes(
                ) and nx.has_path(G_aux, nodes[j], nodes[i]):
                    return False
                else:
                    G_aux.add_edge(nodes[i], nodes[j])
            elif solucao[k] == 2:
                if nodes[i] in G_aux.nodes() and nodes[j] in G_aux.nodes(
                ) and nx.has_path(G_aux, nodes[i], nodes[j]):
                    return False
                else:
                    G_aux.add_edge(nodes[j], nodes[i])
            k = k + 1
        aux = aux + 1
    for i in nodes:
        if i not in G_aux.nodes():
            return False
    return G_aux
    def inf(self, file1):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        i = 0
        G = BayesianModel()
        nodeList = {}
        while i < len(lines):
            if lines[i] == '\n':
                break
            nodeName = self.getnode(lines[i])
            valueNum = int(lines[i + 1])
            cpd_str = lines[i + 2]
            sequence = self.getList(lines[i + 3])
            card = self.getCard(lines[i + 4])
            cpd = self.parseCpd(cpd_str, valueNum, card)
            l = {}
            l['nodeName'] = nodeName
            l['valueNum'] = valueNum
            l['cpd'] = cpd
            l['sequence'] = sequence
            l['card'] = card
            # l = [nodeName,valueNum,cpd,sequence,card]

            nodeList[nodeName] = l
            i += 5
        edges = self.getegdes(lines[i + 1])
        evidence2 = self.getValue(lines[i + 3])

        # print(nodeList)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        for (this, node) in nodeList.items():
            if node['sequence'][0] == '':
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 values=node['cpd'])
            else:
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 evidence=node['sequence'],
                                 evidence_card=node['card'],
                                 values=node['cpd'])
            G.add_cpds(cpt)

        if G.check_model():
            # print('1')
            # belief_propagation = BeliefPropagation(G)
            inference = VariableElimination(G)
            result = ''

            for node in G.nodes():
                if node not in evidence2:
                    namelist = [node]
                    result += node + ' '
                    phi_query = inference.query(variables=namelist,
                                                evidence=evidence2,
                                                show_progress=False).values
                    result += str(phi_query) + '\n'
            print(result)
Exemple #3
0
    def test_build_skeleton(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        self.assertTrue(
            self._edge_list_equal(skel1.edges(), [('A', 'D'), ('B', 'D'),
                                                  ('C', 'D')]))

        sep_sets_ref1 = {
            frozenset({'A', 'C'}): (),
            frozenset({'A', 'B'}): (),
            frozenset({'C', 'B'}): ()
        }
        self.assertEqual(sep_sets1, sep_sets_ref1)

        model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton(
            model.nodes(), model.get_independencies())
        self.assertTrue(
            self._edge_list_equal(skel2, [('D', 'B'), ('A', 'C'), ('B', 'C'),
                                          ('C', 'E')]))

        sep_sets_ref2 = {
            frozenset({'D', 'C'}): ('B', ),
            frozenset({'E', 'B'}): ('C', ),
            frozenset({'A', 'D'}): (),
            frozenset({'E', 'D'}): ('C', ),
            frozenset({'E', 'A'}): ('C', ),
            frozenset({'A', 'B'}): ()
        }
        # witnesses/seperators might change on each run, so we cannot compare directly
        self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys())
        self.assertEqual([len(v) for v in sorted(sep_sets2.values())],
                         [len(v) for v in sorted(sep_sets_ref2.values())])
Exemple #4
0
def backdoorsTo(model: BayesianModel,
                node: RandomVariable,
                notation: str = ARROW) -> Dict[Name, List[Set[Name]]]:

    inference: CausalInference = CausalInference(model)

    # Getting all the predecessors to get a more complete list of possible adjustment sets
    # TODO: does this give the entire possible list of adjustment sets with each predecessor node, from the bottom
    #  queryVar?
    #predecessorVars = model.predecessors(queryVar) #model.get_parents(queryVar)
    allVars = model.nodes()

    # Getting the variables that will be used as evidence / observed to influence active trails (in other words,
    # the variables that must be set as observed in the query of variable elimination)
    varAndObservedPairs = set([
        (startVar,
         inference.get_all_backdoor_adjustment_sets(X=startVar, Y=node.var))
        for startVar in allVars
    ])
    # remove null forzen sets
    #pairsOfPredObservedVars = list(filter(lambda pair: pair[1] != frozenset(), pairsOfPredObservedVars))

    # Attaching ev var to the frozen set adjustment sets (changing the frozenset datatype to be set on the inside and
    # list on the outside)
    backdoorChoices: List[Tuple[Name, Set[Name]]] = list(
        itertools.chain(
            *[[(startVar, set(innerFroz))
               for innerFroz in outerFroz] if outerFroz != frozenset() else [(
                   startVar, None)]
              for startVar, outerFroz in varAndObservedPairs]))

    # Creating a dict to accumulate adjustment sets of the same keys (concatenating)
    backdoorDict: Dict[Name, List[Set[Name]]] = {}

    for startVar, adjustSets in backdoorChoices:

        if startVar in backdoorDict.keys():
            backdoorDict[startVar] = backdoorDict[startVar] + [adjustSets]
        else:
            backdoorDict[startVar] = [adjustSets]

    if notation == ARROW:  #use arrows
        # Now creating the arrow between startvar and endvar (to make the path clear)
        backdoorTrailDict: Dict[Trail, List[Set[Name]]] = {}

        for startVar, adjustLists in backdoorDict.items():
            backdoorTrailDict[f"{startVar} --> {node.var}"] = adjustLists

        return backdoorTrailDict
    elif notation == PAIR:
        Pair = collections.namedtuple("Pair", ["From", "To", "AdjustSets"])

        lists = []
        for startVar, adjustLists in backdoorDict.items():
            lists.append(
                Pair(From=startVar, To=node.var, AdjustSets=adjustLists))

        return lists
    else:  # do some notation (if notation == None)
        return backdoorDict
Exemple #5
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get("type") == "BayesianNetwork":
            model = BayesianModel()
            model.add_nodes_from(self.probnet["Variables"].keys())
            model.add_edges_from(self.probnet["edges"].keys())

            tabular_cpds = []
            cpds = self.probnet["Potentials"]
            for cpd in cpds:
                var = list(cpd["Variables"].keys())[0]
                states = self.probnet["Variables"][var]["States"]
                evidence = cpd["Variables"][var]
                evidence_card = [
                    len(self.probnet["Variables"][evidence_var]["States"])
                    for evidence_var in evidence
                ]
                arr = list(map(float, cpd["Values"].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size // len(states)))
                tabular_cpds.append(
                    TabularCPD(var, len(states), values, evidence, evidence_card)
                )

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet["Variables"][var].items():
                    model.nodes[var][prop_name] = prop_value
            edges = model.edges()

            if nx.__version__.startswith("1"):
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.edge[edge[0]][edge[1]][prop_name] = prop_value
            else:
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.adj[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Exemple #6
0
    def cal(self, file1, file2):
        f1 = open(file1)
        lines = f1.readlines()
        nodes = self.getegdes(lines[0])
        edges = self.getegdes(lines[1])
        data = pd.read_csv(file2)

        G = BayesianModel()
        G.add_nodes_from(nodes)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        output1 = []

        for i in range(int(len(edges) / 2)):
            mut = mr.mutual_info_score(data[edges[2 * i]],
                                       data[edges[2 * i + 1]])
            output1.append(mut)

        output2 = {}
        for node1 in G.nodes():
            d = {}
            for node2 in G.nodes():
                if node1 == node2:
                    continue
                mut = mr.mutual_info_score(data[node1], data[node2])

                d[node2] = mut
            output2[node1] = d

        print(output1)
        print(output2)

        with open('mutual_output.txt', 'w') as f:
            f.write(str(output1))
            f.write('\n')
            f.write(str(output2))
Exemple #7
0
def create_model_and_inference():
    dep_df = pd.read_csv('dependencies.csv', sep=';')

    def connect(df, source, edgelist):
        source_df = df[df['Column2'] == source]
        for col in source_df.iloc[0, 3:len(source_df.columns)]:
            target_df = df[df['Column1'] == col]['Column2']
            if not target_df.empty:
                target = target_df.item()
                if not (target, source) in edgelist:
                    edgelist.append((source, target))
                    connect(df, target, edgelist)

    edges = []
    connect(dep_df, 'myproximus-usage', edges)
    edges = [(t[1], t[0]) for t in edges]

    nodes = set(itertools.chain.from_iterable(edges))
    nodes_df = dep_df.iloc[:, 1].to_frame()
    nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)]

    nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df = nodes_df.set_index('Column2').transpose()

    model = BayesianModel()
    model.add_nodes_from(nodes)
    for edge in edges:
        try:
            model.add_edge(edge[0], edge[1])
        except:
            print('WARNING: tried to add edge which forms loop: ' + str(edge))

    model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu")
    # for cpd in model.get_cpds():
    #     print(cpd)

    draw_network(model.nodes(), model.edges(), {}, [])

    return model, VariableElimination(model)
    def test_estimate_from_independencies(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([('B', 'D'), ('A', 'D'), ('C', 'D')]))

        model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
                            model1.nodes(),
                            model1.get_independencies())

        self.assertTrue(set(model2.edges()) == set(model1.edges()) or
                        set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
Exemple #9
0
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR):
    print('Making bayes net')
    graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p'
    if os.path.isfile(graph_file) and load == True:
        print('Loading saved graph from file...')
        G = pickle.load(open(graph_file, 'rb'))
        G.check_model()
    else:
        print('loading data...')
        training_labels, go_dict = load_label_data()
        if subtree:
            labels_list = _subtree_labels()
            print(labels_list)
        else:
            labels_list = go_dict.keys()

        print('adding nodes and edges...')
        G = BayesianModel()
        G.add_edges_from([(label, label + '_hat') for label in labels_list])
        obo_graph = obonet.read_obo(OBODB_FILE)
        for label in labels_list:
            children = [
                c for c in networkx.ancestors(obo_graph, label)
                if c in labels_list
            ]
            for child in children:
                G.add_edge(child, label)

        predicted_cpds = get_model_cpds(labels_list=labels_list,
                                        modelsdir=MODEL_CPDS_DIR)
        for cpd in predicted_cpds:
            G.add_cpds(cpd)
        true_label_cpds = get_true_label_cpds(training_labels,
                                              go_dict,
                                              labels_list=labels_list)
        for cpd in true_label_cpds:
            G.add_cpds(cpd)
        remove_list = []
        for node in G.nodes():
            if G.get_cpds(node) == None:
                remove_list.append(node)
                # remove_list.append(node+'_hat')
        for node in remove_list:
            if node in G:
                G.remove_node(node)
        G.check_model()
        pickle.dump(G, open(graph_file, 'wb'))
    return G
    def test_estimate_from_independencies(self):
        ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies(
            "ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([("B", "D"), ("A", "D"), ("C", "D")]))

        model1 = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"),
                                ("C", "E")])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
            model1.nodes(), model1.get_independencies())

        self.assertTrue(
            set(model2.edges()) == set(model1.edges())
            or set(model2.edges()) == set([("B", "C"), ("A", "C"), ("C", "E"),
                                           ("D", "B")]))
Exemple #11
0
    def test_estimate_from_independencies(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies(
            "ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([('B', 'D'), ('A', 'D'), ('C', 'D')]))

        model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'),
                                ('C', 'E')])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
            model1.nodes(), model1.get_independencies())

        self.assertTrue(
            set(model2.edges()) == set(model1.edges())
            or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'),
                                           ('D', 'B')]))
Exemple #12
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get('type') == "BayesianNetwork":
            model = BayesianModel(self.probnet['edges'].keys())

            tabular_cpds = []
            cpds = self.probnet['Potentials']
            for cpd in cpds:
                var = list(cpd['Variables'].keys())[0]
                states = self.probnet['Variables'][var]['States']
                evidence = cpd['Variables'][var]
                evidence_card = [len(self.probnet['Variables'][evidence_var]['States'])
                                 for evidence_var in evidence]
                arr = list(map(float, cpd['Values'].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size//len(states)))
                tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card))

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet['Variables'][var].items():
                    model.node[var][prop_name] = prop_value

            edges = model.edges()
            for edge in edges:
                for prop_name, prop_value in self.probnet['edges'][edge].items():
                    model.edge[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
    def test_build_skeleton(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        self.assertTrue(self._edge_list_equal(skel1.edges(), [('A', 'D'), ('B', 'D'), ('C', 'D')]))

        sep_sets_ref1 = {frozenset({'A', 'C'}): (), frozenset({'A', 'B'}): (), frozenset({'C', 'B'}): ()}
        self.assertEqual(sep_sets1, sep_sets_ref1)

        model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies())
        self.assertTrue(self._edge_list_equal(skel2, [('D', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'E')]))

        sep_sets_ref2 = {frozenset({'D', 'C'}): ('B',),
                         frozenset({'E', 'B'}): ('C',),
                         frozenset({'A', 'D'}): (),
                         frozenset({'E', 'D'}): ('C',),
                         frozenset({'E', 'A'}): ('C',),
                         frozenset({'A', 'B'}): ()}
        # witnesses/seperators might change on each run, so we cannot compare directly
        self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys())
        self.assertEqual([len(v) for v in sorted(sep_sets2.values())],
                         [len(v) for v in sorted(sep_sets_ref2.values())])
    def test_build_skeleton(self):
        ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"])
        ind = ind.closure()
        skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        self.assertTrue(
            self._edge_list_equal(skel1.edges(), [("A", "D"), ("B", "D"),
                                                  ("C", "D")]))

        sep_sets_ref1 = {
            frozenset({"A", "C"}): (),
            frozenset({"A", "B"}): (),
            frozenset({"C", "B"}): (),
        }
        self.assertEqual(sep_sets1, sep_sets_ref1)

        model = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"), ("C", "E")])
        skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton(
            model.nodes(), model.get_independencies())
        self.assertTrue(
            self._edge_list_equal(skel2, [("D", "B"), ("A", "C"), ("B", "C"),
                                          ("C", "E")]))

        sep_sets_ref2 = {
            frozenset({"D", "C"}): ("B", ),
            frozenset({"E", "B"}): ("C", ),
            frozenset({"A", "D"}): (),
            frozenset({"E", "D"}): ("C", ),
            frozenset({"E", "A"}): ("C", ),
            frozenset({"A", "B"}): (),
        }
        # witnesses/seperators might change on each run, so we cannot compare directly
        self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys())
        self.assertEqual(
            [len(v) for v in sorted(sep_sets2.values())],
            [len(v) for v in sorted(sep_sets_ref2.values())],
        )
Exemple #15
0
# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
print(cancer_model.check_model())

# Check d-separations. This is only meant for those interested. You do not need to understand this to do the project.
print(cancer_model.is_active_trail('Pollution', 'Smoker'))
print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer']))
print(cancer_model.local_independencies('Xray'))
print(cancer_model.get_independencies())

# Print model information
print(cancer_model.edges())
print(cancer_model.nodes())
print(cancer_model.get_cpds())

# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination

cancer_infer = VariableElimination(cancer_model)

# Query
print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0}))
print(
    cancer_infer.query(variables=['Cancer'],
                       evidence={
                           'Smoker': 0,
                           'Pollution': 0
                       }))
Exemple #16
0
                                        3, range(3), True)

hh['IncomeQ'] = hh.apply(lambda row: incomeToNHTSBands(row['HhIncome']),
                         axis=1)
hh.loc[hh['HhSize'] > 5, 'HhSize'] = 6

#need to start the ordinal variables at zero
hh['Bedrooms'] = hh.apply(lambda row: row['Bedrooms'] - 1, axis=1)
hh['HhSize'] = hh.apply(lambda row: row['HhSize'] - 1, axis=1)
hh['IncomeQ'] = hh.apply(lambda row: row['IncomeQ'] - 1, axis=1)

model = BayesianModel([('IncomeQ', 'Bedrooms'), ('HhSize', 'Bedrooms'),
                       ('IncomeQ', 'RentQ'), ('Bedrooms', 'RentQ')])
#nx.draw_networkx(model, with_labels=True)

modelData = hh[model.nodes()].copy()
testData = modelData.iloc[int(0.85 * modelData.shape[0]):int(modelData.shape[0]
                                                             )].copy()
trainData = modelData.iloc[0:int(0.85 * modelData.shape[0])].copy()

model.fit(trainData, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
#    print("CPD of {variable}:".format(variable=cpd.variable))
#    print(cpd)

model_sample = BayesianModelSampling(model)
pickle.dump(model_sample, open('results/sampler.p', 'wb'))

# open the nhts sample and add the inferred resType requirements
nhtsSample = pd.read_csv('results/nhtsSample.csv')
resType = []
from pgmpy.readwrite.BIF import BIFWriter
import pandas as pd
import numpy as np
from time import time
import graphviz as gv
import os

train = pd.read_csv('../msnbcWithHeader.csv', sep=',')
train = train[train.sum(axis=1) < 200]
train[train > 1] = 1

train_start = time()
bic = BicScore(train)
hc = HillClimbSearch(train, scoring_method=bic)
best_model = hc.estimate(prog_bar=True)
edges = best_model.edges()
model = BayesianModel(edges)
model.fit(train, estimator=BayesianEstimator, prior_type="BDeu")
variables = model.nodes()

print(model.edges())
train_end = time() - train_start
print("train time " + str(train_end))

my_graph = gv.Digraph(format='png')
for node in variables:
    my_graph.node(node)
for edge in edges:
    my_graph.edge(edge[0], edge[1])
filename = my_graph.render('../graph', view=True)
Exemple #18
0
class MyClass(object):
    def __init__(self, case):
        self.case = case
        self.results = []
        self.networx_test = nx.DiGraph()
        self.networx_fixed = nx.DiGraph()
        self.pgmpy_test = BayesianModel()
        self.networx = nx.DiGraph()
        self.pgmpy = BayesianModel()
        self.best_error = math.inf
        self.best_topology = [0, 0, nx.DiGraph,
                              0]  #[error, entropy, networkx DiGraph, loop]
        self.dictionary = []
        self.header = {}
        self.nodes_0 = []
        self.edges_0 = {}
        self.nodes = []
        self.edges = {}
        self.cpds = {}
        self.colors_dictionary = {}
        self.colors_table = []
        self.colors_cpd = []
        self.learning_data = {}
        self.nummber_of_colors = 0
        self._util = Utilities(case)
        self._lat = Lattices(self._util)
        self.expected_result = [0, 0, 0]
        self.loop = 0

    def get_my_colors(self):
        evidence = []
        cardinality = []
        for i, node in enumerate(self.nodes):
            if 'BEN' in node[0] or 'MEM' in node[0]:
                evidence.append(node[0])
                cardinality.append(node[1]['cardinality'])
        self.colors_dictionary, self.colors_table, self.colors_cpd = self.color_cpd(
            'WORLD', 3, evidence, cardinality)
        self.number_of_colors = self.colors_table.shape[1]
        # for i in range(0, len(self.colors_table[1])):
        #     rows = len(self.colors_table)
        #     hi = 1000
        #     lo = 1
        #     sum = hi+(rows-1)
        #     hi /= sum
        #     lo /= sum
        #     for j in range(0, rows):
        #         if self.colors_table[j][i] == 1:
        #             self.colors_table[j][i] = hi
        #         else:
        #             self.colors_table[j][i] = lo

        # print('Number of colors : ', self.number_of_colors)
        # print(self.colors_cpd)
        #print(self.colors_cpd.values)

    def color_cpd(self, var, card_var, evidence, cardinality):
        table = CPD.get_index_matrix(cardinality)
        colors = {}
        hi = 1  #0.999
        lo = 1 - hi
        C = np.prod(cardinality)
        matrix = np.full((3, C), 1. / 3.)
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3, 1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3, 1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3, 1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [lo, lo, lo, lo, hi, lo, hi, lo]
            matrix[1] = [hi, lo, hi, lo, lo, hi, lo, hi]
            matrix[2] = [lo, hi, lo, hi, lo, lo, lo, lo]
        if 'BENS_0' in evidence and 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence:
            matrix[0] = [
                lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo
            ]
            matrix[1] = [
                hi, lo, hi, lo, lo, hi, lo, hi, hi, lo, hi, lo, lo, hi, lo, hi
            ]
            matrix[2] = [
                lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo
            ]

        cpd = TabularCPD(variable=var,
                         variable_card=card_var,
                         values=matrix,
                         evidence=evidence,
                         evidence_card=cardinality)
        for i, node in enumerate(evidence):
            colors.update({node: table[i]})
        return colors, table, cpd

    # def set_color(self, color):
    #     col = self.colors_table[:, color]
    #     for i in range(0,len(col)):
    #         node = 'BENS_'+ str(i)
    #         self.pgmpy.get_cpds(node).values = CPD.RON_cpd(node, self.pgmpy.get_cardinality(node), mu = int(col[i])).values

    def test_topology(self, entropy):
        self.networx_test = copy.deepcopy(self.networx)
        self.pgmpy_test = BayesianModel()
        self.pgmpy_test = self._util.translate_digraph_to_pgmpy(
            self.networx.copy())
        #model = {'main': GenerativeModel(SensoryInputVirtualPeepo(self), self.pgmpy_test)}
        self.expected_result = [0, 0, 0]
        ''' ------ going through all possible "colors'''
        error = 0
        for color in range(0, self.number_of_colors):
            states = self.colors_table[:, color]
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            self.expected_result = reshaped_cpd[:, int(color)]
            for i, pixel in enumerate(states):
                if 'BENS_' + str(i) not in self.networx_test.nodes():
                    continue
                cardinality = self.pgmpy_test.get_cardinality('BENS_' + str(i))
                self.pgmpy_test.get_cpds(
                    'BENS_' + str(i)).values = CPD.create_fixed_parent(
                        cardinality, state=int(pixel))
            #error += self.do_inference(model)

            error += self.do_simple_inference()
        error /= self.number_of_colors
        self.results.append([entropy, error])
        if error <= self.best_error:
            self.best_error = error
            self.best_topology[0] = error
            self.best_topology[1] = entropy
            self.best_topology[2] = self.networx_test
            self.best_topology[3] = self.loop
        self.loop += 1

    def do_inference(self, models):
        error = 0
        for key in models:
            error += models[key].process()
        return error

    '''.................. vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ..................................'''

    def do_simple_inference(self):
        total_prediction_error_size = 0
        for node in self.pgmpy_test.get_leaves():
            prediction = self.predict(node)
            observation = self.sensory_input(node)
            prediction_error_size = self.error_size(prediction, observation)
            prediction_error = self.error(prediction, observation)
            precision = entropy(prediction, base=2)
            total_prediction_error_size += prediction_error_size
        return total_prediction_error_size

    def predict(self, node):
        """
        Predicts the given leaf node (i.e. the observational node) based on the root nodes (i.e. the belief nodes)
        :return: prediction for given observation variable, a prediction is a probability distribution
        :rtype: np.array
        """
        infer = VariableElimination(self.pgmpy_test)
        evidence = self.get_root_nodes()
        evidence = {k: v for k, v in evidence.items() if k not in [node]}
        return infer.query(variables=[node], evidence=evidence)[node].values

    def sensory_input(self, name):
        expected_result = self.expected_result
        cpds = []
        for i in range(0, len(expected_result)):
            cpds.append([
                'WORLD_' + str(i),
                CPD.create_fixed_parent(2, state=int(expected_result[i]))
            ])
        for i, node in enumerate(self.nodes):
            for j in range(0, len(cpds)):
                if name == cpds[j][0]:
                    return cpds[j][1]

    def error(self, pred, obs):
        """
        Calculates the prediction error as the residual of subtracting the predicted inputs from the observed inputs
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error
        :type pred : np.array
        :type obs : np.array
        :rtype : np.array
        """
        return obs - pred

    def error_size(self, pred, obs):
        """
        Calculates the size of the prediction error as the Kullback-Leibler divergence. This responds the magnitude
        of the prediction error, how wrong the prediction was.
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error size
        :type pred : np.array
        :type obs : np.array
        :rtype : float
        """
        return entropy(obs, pred)

    def get_root_nodes(self):
        """
        Returns status of all root nodes.
        :param network: Bayesian Network representing the generative model
        :return: Dictionary containing all root nodes as keys and status as values
        :type network: BayesianModel
        :rtype dict
        """
        roots = {}
        for root in self.pgmpy_test.get_roots():
            roots.update(
                {root: np.argmax(self.pgmpy_test.get_cpds(root).values)})
        return roots

    def get_observations(self):
        obs = {}
        for leaf in self.pgmpy_test.get_leaves():
            obs.update(
                {leaf: np.argmax(self.pgmpy_test.get_cpds(leaf).values)})
        return obs

    '''**********************   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                           '''

    def estimate_parameters(self):
        data = pd.DataFrame(data=self.learning_data)
        sample_size = len(self.learning_data)
        # print(sample_size)
        estimator = BayesianEstimator(self.pgmpy, data)
        # print('data')
        # print('pgmpy node : ', self.pgmpy.nodes())
        # print(self.learning_data)
        # print(data)
        pseudocount = {
            'BENS_0': [1, 2],
            'BENS_1': [1, 2],
            'BENS_2': [1, 2],
            'BENS_3': [1, 2],
            'WORLD_0': [1, 2],
            'WORLD_1': [1, 2],
            'WORLD_2': [1, 2]
        }

        pseudocount = [0.9, 0.9]
        if not 'BENS_1' in self.pgmpy.nodes(
        ) or not 'BENS_2' in self.pgmpy.nodes(
        ) or not 'BENS_3' in self.pgmpy.nodes():
            pseudocount = [0.9, 0.9, 0.9]
        # print('pseudocount :', pseudocount)
        for i, node in enumerate(self.nodes):

            if 'LAN' in node[0] or 'MOTOR' in node[0] or 'WORLD' in node[0]:
                # print('cardinality node ', node[0], ' : ', self.pgmpy.get_cardinality(node[0]))
                # print(self.pgmpy.get_cpds(node[0]).values)
                #self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(node[0], prior_type='dirichlet', pseudo_counts=pseudocount).values
                self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(
                    node[0],
                    prior_type='BDeu',
                    equivalent_sample_size=sample_size).values

    def add_edges(self, topology):
        self.networx.remove_edges_from(self.edges)
        self.edges = []
        self.nodes = []
        shape = np.asarray(topology).shape
        ''' let's first remove all void nodes  ----> not necssary -----> delete the code ??'''
        nodes_to_remove = []
        # rows = np.sum(topology, axis = 1)
        # for row in range(0, len(rows)):
        #     if rows[row] == 0:
        #         nodes_to_remove.append('WORLD_' + str(row))
        columns = np.sum(topology, axis=0)
        for column in range(0, len(columns)):
            if columns[column] == 0:
                nodes_to_remove.append('BENS_' + str(column))
        self.networx.remove_nodes_from(nodes_to_remove)
        self.nodes = self.networx.nodes(data=True)
        for column in range(0, shape[1]):
            for row in range(0, shape[0]):
                if topology[row][column] == 1:
                    parent = 'BENS_' + str(column)
                    child = 'WORLD_' + str(row)
                    self.networx.add_edge(parent, child)
        self.edges = self.networx.edges()
        # print('edges  --------------------------- >', self.edges)
        # print(self.nodes)

    def add_dummy_cpds(self):
        for i, node in enumerate(self.nodes):
            cardinality = node[1]['cardinality']
            if ('BEN' in node[0]) or ('MEM' in node[0]):
                self.nodes[i][1]['cpd'] = CPD.create_fixed_parent(
                    cardinality, modus='uniform')
            else:
                incoming_nodes = self.networx.in_edges(node[0])
                if len(incoming_nodes) == 0:
                    self.nodes[i][1]['cpd'] = CPD.create_random_child(
                        cardinality, modus='orphan')
                    continue
                card_parent = []
                for m, n in enumerate(incoming_nodes):
                    par = self.networx.node[n[0]]['cardinality']
                    card_parent.append(par)
                self.nodes[i][1]['cpd'] = CPD.create_random_child(
                    cardinality, card_parent)

        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])
        self.nodes = self.networx.nodes(data=True)
        # print('   IN NETWORX  ')
        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])

    def create_learning_data(self):
        self.get_my_colors()
        self.learning_data = {}
        ben_nodes = [x for x in self.nodes if "BEN" in x[0]]
        world_nodes = [x for x in self.nodes if "WORLD" in x[0]]

        for i, node in enumerate(ben_nodes):
            self.learning_data.update({node[0]: self.colors_table[i].tolist()})

        for i, node in enumerate(world_nodes):
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            for hue in range(0, 3):
                if str(hue) in node[0]:
                    self.learning_data.update(
                        {node[0]: reshaped_cpd[hue, :].tolist()})
        # for i, node in enumerate(self.nodes):
        #     if "BEN" in node[0]:
        #         self.learning_data.update({node[0]:self.colors_table[i].tolist()})
        #     if "WORLD" in node[0]:
        #         shape = self.colors_cpd.values.shape
        #         reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape)/shape[0]))
        #         for hue in range(0,3):
        #             if str(hue) in node[0]:
        #                 self.learning_data.update({node[0]:reshaped_cpd[hue,:].tolist()})
        # print('Learning data')
        # print(self.learning_data)

    def do_it(self):
        '''EXPLANATIONS'''
        self.networx_fixed, self.dictionary, self.header = self._util.get_network(
        )
        self.networx = self.networx_fixed.copy()
        self.networx_test = self.networx_fixed.copy()
        print('Dictionary : ', self.dictionary)
        ''' -------------- Constructing all possible topologies, 
                              --> option : restrain the number with the treshold : 
                                        0 -> all possible topologies, 100 -> only the fully connnected topology'''
        possible_topologies = self._lat.get_possible_topologies(
            treshold=50
        )  #setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered
        print("Possible topologies : ", len(possible_topologies))
        entropy = 0
        count = 0  #TEMPORARY
        ''' -------------- walking through all toplogies'''
        for topology in possible_topologies:
            if self.loop < 200 or self.loop > 350:
                self.loop += 1
                count += 1
                continue
            entropy = topology[1]
            if entropy == 0:
                continue  #safeguard
            print('Loop *-> ', self.loop + 1, ' of ', len(possible_topologies))
            topo = topology[0]
            self.networx = nx.DiGraph()
            self.networx = self.networx_fixed.copy()
            ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change
                            depending on the number of incoming nodes'''
            self.add_edges(topo)
            self.add_dummy_cpds()
            self.nodes = self.networx.nodes(data=True)
            self.create_learning_data()
            # print('edges = ' , self.edges)
            #print(self.learning_data)
            ''' ----------- convert DiGraph to pgmpy and check'''
            self.pgmpy = BayesianModel()
            self.pgmpy = self._util.translate_digraph_to_pgmpy(
                self.networx.copy())
            '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs 
                             -> provide pgmpy with the learning data'''

            self.pgmpy.check_model()
            self.estimate_parameters()
            '''-------------- Testing the constructed topology'''
            self.test_topology(entropy)
            '''following  4 lines to remove : just use to check whether the algorithms are correct regarding the edges building'''
            count += 1
            #print('edges : ', self.edges)
            #
            # if count > 350:
            #     break
        print('Check -> number of processed topologies in loop : ', count)
        # print('My colors : ')
        # print(self.colors_table)
        # print(self.colors_cpd)
        '''  the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs
        but for the moment being we just assume there are only BEN's and WORLD's'''

        # self.networx.add_edge('BENS_1','WORLD_1')
        # self.networx.node['BENS_1']['cpd'] = [0.8,0.2]
        # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]]
        ''' if a best model has ben found, save it -> first update the Utility class object and save it'''
        # self._util.update_networkx(self.networx, self.dictionary, self.header)
        # self._util.save_network()
        # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header)
        # self._util.save_pgmpy_network()
        self.draw()
        self.draw_xy()
        return self.results

    def draw_xy(self):
        x = []
        y = []
        s = []
        color = []
        best_x = 0
        best_y = 0
        for i in range(0, len(self.results)):
            x.append(self.results[i][0])
            y.append(self.results[i][1])
            if i == self.best_topology[3]:
                best_x = self.results[i][0]
                best_y = self.results[i][1]
                s.append(60)
                color.append("r")
            else:
                s.append(20)
                color.append("b")
        plt.scatter(x, y, s=s, c=color, alpha=0.5)
        plt.xlabel("Complexity of topology")
        plt.ylabel("Average error over all colors")
        plt.show()

    def draw(self):
        '''TO REMOVE LATER'''
        plt.figure(figsize=(10, 5))
        pos = nx.circular_layout(self.best_topology[2], scale=2)
        #node_labels = nx.get_node_attributes(self.networx, 'cpd')
        nx.draw(self.best_topology[2],
                pos,
                node_size=1200,
                node_color='lightblue',
                linewidths=0.25,
                font_size=10,
                font_weight='bold',
                with_labels=True)
        plt.text(1, 1, 'Topology nr. : ' + str(self.best_topology[3]))
        plt.show()
Exemple #19
0
# cancer_model.check_model()

# data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
# model = BayesianModel([('A', 'C'), ('B', 'C')])
# estimator = BayesianEstimator(model, data)
# cpd_C = estimator.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[2, 4])
# model.add_cpds(cpd_C)

model.fit(train, estimator=BayesianEstimator,
          prior_type="BDeu")  # default equivalent_sample_size=5

# pseudo_counts = {'D': [300, 700], 'I': [500, 500], 'G': [800, 200], 'L': [500, 500], 'S': [400, 600]}
# model.fit(data, estimator=BayesianEstimator, prior_type='dirichlet', pseudo_counts=pseudo_counts)

#输出节点信息
print(model.nodes())
#输出依赖关系
print(model.edges())
#查看某节点概率分布
print(model.get_cpds('Pclass').values)

from pgmpy.inference import VariableElimination

model_infer = VariableElimination(model)
q = model_infer.query(variables=['Survived'], evidence={'Fare': 0})
print(q)
'''
+------------+-----------------+
| Survived   |   phi(Survived) |
+============+=================+
| Survived_0 |          0.6341 |
Exemple #20
0
                ('TQ', 'DFT' + str(i)),
                ('DI' + str(i), 'RD' + str(i)),
                ('DFT' + str(i), 'RD' + str(i)),
                ('RD' + str(i), 'DFO' + str(i)),
                ('OU', 'DFO' + str(i))]

list_edges += [('RD0', 'DI1'), ('RD1', 'DI2'), ('DPQ', 'DI0'), ('C', 'DI0')]

model.add_edges_from(list_edges)
model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10)
for edge in model.edges():
    print(edge)
    print("\n")
infer = VariableElimination(model)

nodes = model.nodes()
Distribution = {}

for key in pr.keys():
    Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)]
    nodes.remove(key)
    print('pr done')

for key in nodes:
    Distribution[key] = infer.query([key], evidence = pr)[key].values
    print('done' + key)

print(Distribution['DPQ'])
plt.subplot(4, 4, 1)
plt.bar([1,2,3,4,5], Distribution['DPQ'])
plt.xticks([1.5,2.5,3.5,4.5,5.5], ['very low','low','medium','high','very high'])
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Exemple #22
0
def bayesnet():
    """
    References:
        https://class.coursera.org/pgm-003/lecture/17
        http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html
        http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf
        http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html
        https://github.com/pgmpy/pgmpy.git
        http://pgmpy.readthedocs.org/en/latest/
        http://nipy.bic.berkeley.edu:5000/download/11
    """
    # import operator as op
    # # Enumerate all possible events
    # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list))
    # _esdat = list(ut.iprod(*map(range, varcard_list)))
    # _escol = list(map(op.attrgetter('variable'), cpd_list))
    # event_space = pd.DataFrame(_esdat, columns=_escol)

    # # Custom compression of event space to inspect a specific graph
    # def compress_space_flags(event_space, var1, var2, var3, cmp12_):
    #     """
    #     var1, var2, cmp_ = 'Lj', 'Lk', op.eq
    #     """
    #     import vtool as vt
    #     data = event_space
    #     other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3])
    #     case_flags12 = cmp12_(data[var1], data[var2]).values
    #     # case_flags23 = cmp23_(data[var2], data[var3]).values
    #     # case_flags = np.logical_and(case_flags12, case_flags23)
    #     case_flags = case_flags12
    #     case_flags = case_flags.astype(np.int64)
    #     subspace = np.hstack((case_flags[:, None], data[other_cols].values))
    #     sel_ = vt.unique_row_indexes(subspace)
    #     flags = np.logical_and(mask, case_flags)
    #     return flags

    # # Build special cases
    # case_same   = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)]
    # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)]
    # special_cases = [
    #     case_same,
    #     case_diff,
    # ]

    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd
    from pgmpy.inference import BeliefPropagation  # NOQA
    from pgmpy.inference import VariableElimination  # NOQA

    name_nice = ['n1', 'n2', 'n3']
    score_nice = ['low', 'high']
    match_nice = ['diff', 'same']
    num_names = len(name_nice)
    num_scores = len(score_nice)
    nid_basis = list(range(num_names))
    score_basis = list(range(num_scores))

    semtype2_nice = {
        'score': score_nice,
        'name': name_nice,
        'match': match_nice,
    }
    var2_cpd = {
    }
    globals()['semtype2_nice'] = semtype2_nice
    globals()['var2_cpd'] = var2_cpd

    name_combo = np.array(list(ut.iprod(nid_basis, nid_basis)))
    combo_is_same = name_combo.T[0] == name_combo.T[1]
    def get_expected_scores_prob(level1, level2):
        part1 = combo_is_same * level1
        part2 = (1 - combo_is_same) * (1 - (level2))
        expected_scores_level = part1 + part2
        return expected_scores_level

    # def make_cpd():

    def name_cpd(aid):
        from pgmpy.factors import TabularCPD
        cpd = TabularCPD(
            variable='N' + aid,
            variable_card=num_names,
            values=[[1.0 / num_names] * num_names])
        cpd.semtype = 'name'
        return cpd

    name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')]
    var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds)))
    if True:
        num_same_diff = 2
        samediff_measure = np.array([
            # get_expected_scores_prob(.12, .2),
            # get_expected_scores_prob(.88, .8),
            get_expected_scores_prob(0, 0),
            get_expected_scores_prob(1, 1),
        ])
        samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist()
        def samediff_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='A' + aid1 + aid2,
                variable_card=num_same_diff,
                values=samediff_vals,
                evidence=['N' + aid1, 'N' + aid2],  # [::-1],
                evidence_card=[num_names, num_names])  # [::-1])
            cpd.semtype = 'match'
            return cpd
        samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')]
        var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds)))

        if True:
            def score_cpd(aid1, aid2):
                semtype = 'score'
                evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2]
                evidence_cpds = [var2_cpd[key] for key in evidence]
                evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds]
                evidence_card = list(map(len, evidence_nice))
                evidence_states = list(ut.iprod(*evidence_nice))
                variable_basis = semtype2_nice[semtype]

                variable_values = []
                for mystate in variable_basis:
                    row = []
                    for state in evidence_states:
                        if state[0] == state[1]:
                            if state[2] == 'same':
                                val = .2 if mystate == 'low' else .8
                            else:
                                val = 1
                                # val = .5 if mystate == 'low' else .5
                        elif state[0] != state[1]:
                            if state[2] == 'same':
                                val = .5 if mystate == 'low' else .5
                            else:
                                val = 1
                                # val = .9 if mystate == 'low' else .1
                        row.append(val)
                    variable_values.append(row)

                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=len(variable_basis),
                    values=variable_values,
                    evidence=evidence,  # [::-1],
                    evidence_card=evidence_card)  # [::-1])
                cpd.semtype = semtype
                return cpd
        else:
            score_values = [
                [.8, .1],
                [.2, .9],
            ]
            def score_cpd(aid1, aid2):
                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=num_scores,
                    values=score_values,
                    evidence=['A' + aid1 + aid2],  # [::-1],
                    evidence_card=[num_same_diff])  # [::-1])
                cpd.semtype = 'score'
                return cpd

        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds + samediff_cpds
    else:
        score_measure = np.array([get_expected_scores_prob(level1, level2)
                                  for level1, level2 in
                                  zip(np.linspace(.1, .9, num_scores),
                                      np.linspace(.2, .8, num_scores))])

        score_values = (score_measure / score_measure.sum(axis=0)).tolist()

        def score_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='S' + aid1 + aid2,
                variable_card=num_scores,
                values=score_values,
                evidence=['N' + aid1, 'N' + aid2],
                evidence_card=[num_names, num_names])
            cpd.semtype = 'score'
            return cpd
        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds
        pass

    input_graph = []
    for cpd in cpd_list:
        if cpd.evidence is not None:
            for evar in cpd.evidence:
                input_graph.append((evar, cpd.variable))
    name_model = BayesianModel(input_graph)
    name_model.add_cpds(*cpd_list)

    var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list)))
    globals()['var2_cpd'] = var2_cpd

    varnames = [cpd.variable for cpd in cpd_list]

    # --- PRINT CPDS ---

    cpd = score_cpds[0]
    def print_cpd(cpd):
        print('CPT: %r' % (cpd,))
        index = semtype2_nice[cpd.semtype]
        if cpd.evidence is None:
            columns = ['None']
        else:
            basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence]
            columns = [','.join(x) for x in ut.iprod(*basis_lists)]
        data = cpd.get_cpd()
        print(pd.DataFrame(data, index=index, columns=columns))

    for cpd in name_model.get_cpds():
        print('----')
        print(cpd._str('phi'))
        print_cpd(cpd)

    # --- INFERENCE ---

    Ni = name_cpds[0]

    event_space_combos = {}
    event_space_combos[Ni.variable] = 0  # Set ni to always be Fred
    for cpd in cpd_list:
        if cpd.semtype == 'score':
            event_space_combos[cpd.variable] = list(range(cpd.variable_card))
    evidence_dict = ut.all_dict_combinations(event_space_combos)

    # Query about name of annotation k given different event space params

    def pretty_evidence(evidence):
        return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val])
                for key, val in evidence.items()]

    def print_factor(factor):
        row_cards = factor.cardinality
        row_vars = factor.variables
        values = factor.values.reshape(np.prod(row_cards), 1).flatten()
        # col_cards = 1
        # col_vars = ['']
        basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards]))))
        nice_basis_lists = []
        for varname, basis in zip(row_vars, basis_lists):
            cpd = var2_cpd[varname]
            _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis)
            nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis]
            nice_basis_lists.append(nice_basis)
        row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)]
        print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val'))

    # name_belief = BeliefPropagation(name_model)
    name_belief = VariableElimination(name_model)
    import pgmpy
    import six  # NOQA

    def try_query(evidence):
        print('--------')
        query_vars = ut.setdiff_ordered(varnames, list(evidence.keys()))
        evidence_str = ', '.join(pretty_evidence(evidence))
        probs = name_belief.query(query_vars, evidence)
        factor_list = probs.values()
        joint_factor = pgmpy.factors.factor_product(*factor_list)
        print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')')
        # print(six.text_type(joint_factor))
        factor = joint_factor  # NOQA
        # print_factor(factor)
        # import utool as ut
        print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list]))

    for evidence in evidence_dict:
        try_query(evidence)

    evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0}
    try_query(evidence)

    evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0}
    try_query(evidence)

    globals()['score_nice'] = score_nice
    globals()['name_nice'] = name_nice
    globals()['score_basis'] = score_basis
    globals()['nid_basis'] = nid_basis

    print('Independencies')
    print(name_model.get_independencies())
    print(name_model.local_independencies([Ni.variable]))

    # name_belief = BeliefPropagation(name_model)
    # # name_belief = VariableElimination(name_model)
    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         probs = name_belief.query(['Lk'], evidence)
    #         factor = probs['Lk']
    #         probs = factor.values
    #         evidence_ = evidence.copy()
    #         evidence_['Li'] = name_nice[evidence['Li']]
    #         evidence_['Lj'] = name_nice[evidence['Lj']]
    #         evidence_['Sij'] = score_nice[evidence['Sij']]
    #         evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #         nice2_prob = ut.odict(zip(name_nice, probs.tolist()))
    #         ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format(
    #             evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #             cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #         ))

    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.drop('Lj', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         query_vars = ['Lk', 'Lj']
    #         probs = name_belief.query(query_vars, evidence)
    #         for queryvar in query_vars:
    #             factor = probs[queryvar]
    #             print(factor._str('phi'))
    #             probs = factor.values
    #             evidence_ = evidence.copy()
    #             evidence_['Li'] = name_nice[evidence['Li']]
    #             evidence_['Sij'] = score_nice[evidence['Sij']]
    #             evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #             nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist()))
    #             ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format(
    #                 query_var=query_var,
    #                 evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #                 cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #             ))

    # _ draw model

    import plottool as pt
    import networkx as netx
    fig = pt.figure()  # NOQA
    fig.clf()
    ax = pt.gca()

    netx_nodes = [(node, {}) for node in name_model.nodes()]
    netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()]
    netx_graph = netx.DiGraph()
    netx_graph.add_nodes_from(netx_nodes)
    netx_graph.add_edges_from(netx_edges)

    # pos = netx.graphviz_layout(netx_graph)
    pos = netx.pydot_layout(netx_graph, prog='dot')
    netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True)

    pt.plt.savefig('foo.png')
    ut.startfile('foo.png')
Exemple #23
0
#Read the attributes
lines = list(csv.reader(open('data7_names.csv', 'r')));
attributes = lines[0]

#Read Cleveland Heart dicease data
heartDisease = pd.read_csv('data7_heart.csv', names = attributes)
heartDisease = heartDisease.replace('?', np.nan)

# Model Baysian Network
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'),
('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

print('\nBayesian Network Nodes are: ')
print('\t',model.nodes())
print('\nBayesian Network Edges are:')
print('\t',model.edges())

# Learning CPDs using Maximum Likelihood Estimators
print('\nLearning CPDs using Maximum Likelihood Estimators...');
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Inferencing with Bayesian Network
print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
print('\n1.Probability of HeartDisease given Age=28')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q['heartdisease'])
Exemple #24
0
    UGM = DGM.to_markov_model()
    jtree = UGM.to_junction_tree()

    evidence = {'A': 1}

    marginal = jta(UGM, jtree, evidence.items())
    print "Results of the implemented JTA"
    for m in marginal:
        print m

    print "\n=======================================\n"

    print "Results of the Variable Elimination from pgmpy"

    inference = VariableElimination(DGM)
    for v in get_different(DGM.nodes(), evidence):
        print inference.query(variables=[v], evidence=evidence)[v]

    # visualization part
    # nx.draw_circular(DGM, with_labels=True, node_color="white", node_size=1000)
    # plt.draw()
    # plt.show()

    # nx.draw_circular(UGM, with_labels=True, node_color="white", node_size=1000)
    # plt.draw()
    # plt.show()

    # nx.draw_circular(jtree, with_labels=True, node_color="white", node_shape='s', node_size=8000)
    # plt.draw()
    # plt.show()
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

cancer_model = BayesianModel([('Pollution', 'Cancer'), ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea')])
print('Bayesian network models are :')
print('\t', cancer_model.nodes())
print('Bayesian edges are:')
print('\t', cancer_model.edges())

cpd_poll = TabularCPD(variable='Pollution',
                      variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker',
                       variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer',
                        variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray',
                      variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'],
                      evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea',
                      variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"),
                                ("b", "c")])
        self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")])
        diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            values=[
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"),
                                 ("i", "l")])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"),
                                     ("d", "b"), ("e", "d")]
                            or (edge[1], edge[0]) in [("a", "b"), ("a", "d"),
                                                      ("b", "c"), ("d", "b"),
                                                      ("e", "d")])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"),
                           ("a", "b")])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"),
                                     ("d", "b"), ("d", "e")]
                            or (edge[1], edge[0]) in [("a", "b"), ("c", "b"),
                                                      ("d", "a"), ("d", "b"),
                                                      ("d", "e")])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of("g")
        ancenstors2 = self.G2._get_ancestors_of("d")
        ancenstors3 = self.G2._get_ancestors_of(["i", "l"])
        self.assertEqual(ancenstors1, {"d", "i", "g"})
        self.assertEqual(ancenstors2, {"d"})
        self.assertEqual(ancenstors3, {"g", "i", "l", "d"})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, "h")

    def test_get_cardinality(self):
        self.assertDictEqual(self.G1.get_cardinality(), {
            "diff": 2,
            "intel": 3,
            "grade": 3
        })

    def test_get_cardinality_with_node(self):
        self.assertEqual(self.G1.get_cardinality("diff"), 2)
        self.assertEqual(self.G1.get_cardinality("intel"), 3)
        self.assertEqual(self.G1.get_cardinality("grade"), 3)

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies("a"),
                         Independencies(["a", ["b", "c"]]))
        self.assertEqual(
            self.G.local_independencies("c"),
            Independencies(["c", ["a", "d", "e"], "b"]),
        )
        self.assertEqual(self.G.local_independencies("d"),
                         Independencies(["d", "c", ["b", "a"]]))
        self.assertEqual(
            self.G.local_independencies("e"),
            Independencies(["e", ["c", "b", "a"], "d"]),
        )
        self.assertEqual(self.G.local_independencies("b"),
                         Independencies(["b", "a"]))
        self.assertEqual(self.G1.local_independencies("grade"),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([("X", "Y"), ("Y", "Z")])
        self.assertEqual(chain.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        fork = BayesianModel([("Y", "X"), ("Y", "Z")])
        self.assertEqual(fork.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        collider = BayesianModel([("X", "Y"), ("Z", "Y")])
        self.assertEqual(collider.get_independencies(),
                         Independencies(("X", "Z"), ("Z", "X")))

    def test_is_imap(self):
        val = [
            0.01,
            0.01,
            0.08,
            0.006,
            0.006,
            0.048,
            0.004,
            0.004,
            0.032,
            0.04,
            0.04,
            0.32,
            0.024,
            0.024,
            0.192,
            0.016,
            0.016,
            0.128,
        ]
        JPD = JointProbabilityDistribution(["diff", "intel", "grade"],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_markov_blanet(self):
        G = DAG([
            ("x", "y"),
            ("z", "y"),
            ("y", "w"),
            ("y", "v"),
            ("u", "w"),
            ("s", "v"),
            ("w", "t"),
            ("w", "m"),
            ("v", "n"),
            ("v", "q"),
        ])
        self.assertEqual(set(G.get_markov_blanket("y")),
                         set(["s", "w", "x", "u", "z", "v"]))

    def test_get_immoralities(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")})
        G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")])
        self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")})
        G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"),
                            ("w", "x")])
        self.assertEqual(G2.get_immoralities(), {("w", "z")})

    def test_is_iequivalent(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")])
        G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds("diff")),
                            id(model_copy.get_cpds("diff")))

        self.G1.remove_cpds("diff")
        diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds("diff"),
                            model_copy.get_cpds("diff"))

        self.G1.remove_node("intel")
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node("diff")
        self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(["diff", "grade"])
        self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")
        self.assertRaises(ValueError, self.G1.get_cpds, "grade")

    def tearDown(self):
        del self.G
        del self.G1
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'b'), ('b', 'c'), ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
Exemple #28
0
class TestGibbsSampling(unittest.TestCase):
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = DiscreteFactor(['C', 'B'], [4, 3],
                                   [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)

    def tearDown(self):
        del self.bayesian_model
        del self.markov_model

    @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_bayesian_model',
           autospec=True)
    @patch('pgmpy.models.MarkovChain.__init__', autospec=True)
    def test_init_bayesian_model(self, init, get_kernel):
        model = MagicMock(spec_set=BayesianModel)
        gibbs = GibbsSampling(model)
        init.assert_called_once_with(gibbs)
        get_kernel.assert_called_once_with(gibbs, model)

    @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_markov_model',
           autospec=True)
    def test_init_markov_model(self, get_kernel):
        model = MagicMock(spec_set=MarkovModel)
        gibbs = GibbsSampling(model)
        get_kernel.assert_called_once_with(gibbs, model)

    def test_get_kernel_from_bayesian_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_bayesian_model(self.bayesian_model)
        self.assertListEqual(list(gibbs.variables),
                             self.bayesian_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {
            'diff': 2,
            'intel': 2,
            'grade': 3
        })

    def test_get_kernel_from_markov_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_markov_model(self.markov_model)
        self.assertListEqual(list(gibbs.variables), self.markov_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {
            'A': 2,
            'B': 3,
            'C': 4,
            'D': 2
        })

    def test_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(start_state, 2)
        self.assertEquals(len(sample), 2)
        self.assertEquals(len(sample.columns), 3)
        self.assertIn('diff', sample.columns)
        self.assertIn('intel', sample.columns)
        self.assertIn('grade', sample.columns)
        self.assertTrue(set(sample['diff']).issubset({0, 1}))
        self.assertTrue(set(sample['intel']).issubset({0, 1}))
        self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))

    @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True)
    def test_sample_less_arg(self, random_state):
        self.gibbs.state = None
        random_state.return_value = [
            State('diff', 0),
            State('intel', 0),
            State('grade', 0)
        ]
        sample = self.gibbs.sample(size=2)
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(sample), 2)

    def test_generate_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        gen = self.gibbs.generate_sample(start_state, 2)
        samples = [sample for sample in gen]
        self.assertEqual(len(samples), 2)
        self.assertEqual(
            {samples[0][0].var, samples[0][1].var, samples[0][2].var},
            {'diff', 'intel', 'grade'})
        self.assertEqual(
            {samples[1][0].var, samples[1][1].var, samples[1][2].var},
            {'diff', 'intel', 'grade'})

    @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True)
    def test_generate_sample_less_arg(self, random_state):
        self.gibbs.state = None
        gen = self.gibbs.generate_sample(size=2)
        samples = [sample for sample in gen]
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(samples), 2)
Exemple #29
0
class Utilities(object):
    def __init__(self, file):
        ''' no object creation -> opportune  ?'''
        self.keywords = ['BENS', 'MEMS', 'LANS', 'MOTOR', 'WORLD']
        self.standard_nodes = {
            'RONS': {
                'BENS': [],
                'MEMS': []
            },
            'LANS': {
                'LANS': []
            },
            'LENS': {
                'MOTOR': [],
                'WORLD': []
            }
        }
        self.file = file
        self.get_json_path(file)
        self.pgmpy_object = BayesianModel()
        self.networkx_object = nx.DiGraph()
        self.header = ''
        self.dictionary = []

    def get_nodes_in_family(self, family, attributes=False):
        nw_nodes = self.networkx_object.nodes()
        nw_dim = np.asarray(nw_nodes).ndim
        nodes = []
        for i, node in enumerate(nw_nodes):
            if nw_dim > 1:
                node = node[0]
            if family in node:
                nodes.append(node)
        return nodes

    def check_json_path(directory):
        """
        Checks whether the necessary project_repository directory exists.
        If not, creates it

        :param directory: the mother directory to search from downwards

        :type directory: string
        :rtype : none
        """
        if not os.path.exists(directory + '\project_repository\\'):
            os.makedirs(directory + '\project_repository\\')

    def get_json_path(self, file):
        """
        Creates a string containing the full path for the filename passed
        so it will be saved in the project_repository directory

        :param filename: filename without path or extension
        :return: a full path for the file

        :type filename :string
        :rtype : string
        """
        levels = 5
        common = os.path.dirname(os.path.realpath(__file__))
        for i in range(levels + 1):
            common = os.path.dirname(common)
            if 'peepo\peepo' not in common:
                break
        Utilities.check_json_path(common)
        self.file = str(common + '\project_repository\\' + file + '.json')
        print('in get_json_path :', self.file)

    def save_json(self, astring):
        """
        This helping function is only needed to have the json file  formatted in a user friendly way
        as the "dump" method does not provide a lot of possibilities to get it "pretty"

        :param file :the ull path of the json file
        :param astring: the name of the string containing the whole information
        :return: void
        :type file: string
        :type astring : string
        :rtype : void
        """
        text_file = open(str(self.file), "w")
        '''remove all LF written by the dump method'''
        astring = re.sub('\n', '', astring)
        '''For keywords -> insert LF and tabs'''
        astring = re.sub('\"Identification', '\n\"Identification', astring)
        astring = re.sub('\"Date', '\n\"Date', astring)
        astring = re.sub('\"Description', '\n\"Description', astring)
        astring = re.sub('\"Train_from', '\n\"Train_from', astring)
        astring = re.sub('\"Frozen', '\n\"Frozen', astring)
        astring = re.sub('\"Nodes', '\n\n\"Nodes', astring)
        astring = re.sub('\"RONS', '\n\t\t\"RONS', astring)
        astring = re.sub('\"BENS', '\n\t\t\t\"BENS', astring)
        astring = re.sub('\"MEMS', '\n\t\t\t\"MEMS', astring)
        astring = re.sub('\"LANS', '\n\t\t\"LANS', astring)
        astring = re.sub('\"LENS', '\n\t\t\"LENS', astring)
        astring = re.sub('\"MOTOR', '\n\t\t\t\"MOTOR', astring)
        astring = re.sub('\"WORLD', '\n\t\t\t\"WORLD', astring)
        astring = re.sub('\"Edges', '\n\n\"Edges', astring)
        astring = re.sub('\"CPDs', '\n\n\"CPDs', astring)
        astring = re.sub('{', '\n\t\t{', astring)
        text_file.write(astring)
        text_file.write('\n')
        text_file.close()

    def translation(self, astring, from_man_to_machine):
        """
        Given an array of tuples (a,b) in dictionary, returns the second element of the tuple where astring was found
        Is used to not loose the users node names as peepo generates standardized names for the corresponding node

        :param dictionary:an array of tuples -> is created in the method : get_network(file)
        :param astring: the name of the node passsed by the user
        :param from_man_to_machine: an integer -> 0 when we want the translation for the user give name to the standardized name, 1 the other way around
        :return: the corresponding standardized node name
        :type dictionary: np.array
        :type astring : string
        :rtype : string
        """
        source = 0
        target = 1
        if from_man_to_machine == 1:
            source = 1
            target = 0

        for index, item in enumerate(self.dictionary):
            if item[source] == astring:
                break
        return item[target]

    def clean_edge_list(self, edge_array, parent):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for a in edge_array:
            if a != parent:
                cleaned_list.append(a)
        return cleaned_list

    def clean_parent_list(self, parent_array, child):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for i, a in enumerate(parent_array):
            if a[0] != child:
                cleaned_list.append(a[0])
        return cleaned_list

    def get_edges(self):
        """
        Creates a dictionary with a node as a key and an array with its child as value
        (the methods get_child give generally a list of tuples (parent,child)

        :param  pgmpy_object: the pgmpy network
        :return: a dictionary with the edges of all the node

        :type fpgmpy_object:adress
        :rtype :dictionary
                """
        edg = self.pgmpy_object.edges()
        edges = dict()
        [
            edges[str(t[0])].append(str(t[1])) if t[0] in list(edges.keys())
            else edges.update({str(t[0]): [str(t[1])]}) for t in edg
        ]
        return edges

    def get_nodes_and_attributes(self):
        """
        Creates an  array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's and
         the key cardinality returns an int
         the key cpd a 2 dimensional matrix

        :param  pgmpy_object: the pgmpy network
        :return: array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's

        :type  :pgmpy_object:adress
        :rtype :array of tuples
        """
        nodes = self.pgmpy_object.nodes()
        nod_and_attributes = []
        [
            nod_and_attributes.append((str(node), {
                'cardinality':
                int(self.pgmpy_object.get_cardinality(node)),
                'cpd':
                self.pgmpy_object.get_cpds(node).values.astype(float)
            })) for i, node in enumerate(nodes)
        ]
        #need to reshape the cpds when more than 1 parent
        for i, node in enumerate(nod_and_attributes):
            shape = nod_and_attributes[i][1]['cpd'].shape
            dimension = nod_and_attributes[i][1]['cpd'].ndim
            if dimension > 2:
                col = int(np.prod(shape) / shape[0])
                nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                    'cpd'].reshape(shape[0], col)
            nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                'cpd'].tolist()
        return nod_and_attributes

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def update_networkx(self, networkx, dic, header):
        self.header = header
        self.dictionary = dic
        self.networkx_object = networkx

    def update_pgmpy(self, pgmpy, dic, header):
        self.header = header
        self.dictionary = dic
        self.pgmpy_object = pgmpy

    def save_pgmpy_network(self):
        """
                Saves the passed pgmpy_object class object in a json file
        """
        self.translate_pgmpy_to_digraph()
        self.save_network()
        return

    def translate_digraph_to_pgmpy(self, digraf):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.pgmpy_object, x, y = self.get_pgmpy_network(from_object=True,
                                                         digraph=digraf)
        return self.pgmpy_object

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def save_network(self):
        """
        Saves the passed networkx class object in a json file

        """
        data = self.get_empty_canvas()
        data["header"] = self.header
        nw_nodes = self.networkx_object.nodes(data=True)
        nw_edges = self.networkx_object.edges()
        keywords = self.keywords
        nodes = copy.deepcopy(
            self.standard_nodes
        )  #{'RONS': {'BENS': [], 'MEMS': []}, 'LANS': {'LANS': []}, 'LENS': {'MOTOR': [], 'WORLD': []}}
        edges = []
        cpds = []
        '''adding edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            childs = []
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    childs.append(self.translation(edge[1], 1))
            if len(childs) != 0:
                edges.append({self.translation(node_name, 1): childs})

        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            cardinality = node[1]['cardinality']
            cpd = node[1]['cpd']
            for pseudonym in keywords:
                if pseudonym in node_name:
                    node_name_ = self.translation(node_name, 1)
                    if pseudonym == 'BENS' or pseudonym == 'MEMS':
                        nodes['RONS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'LANS':
                        nodes['LANS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'MOTOR' or pseudonym == 'WORLD':
                        nodes['LENS'][pseudonym].append(
                            [node_name_, cardinality])
            cpds.append({self.translation(node_name, 1): cpd})
        data['Nodes'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        data['header']['Date'] = datetime.datetime.now().strftime("%c")
        self.save_json(json.dumps(data))
        return

    def get_pgmpy_network(self, from_object=False, digraph=None):
        """
        Reads the passed json file and translates it's content to the passed pgmpy class object
        - uses the get_network(file) to read the json file in a networkx format and translate this to pgmpy
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :pgmp_object : the pgmpy object which will be completed
        :return: a dictionary as an array of tuples and the header of the json file

        :type file : string
        :type pgmp_object : pgmpy class object
        :rtype : array of tuples, dictionary

        CAUTION : the method does not perform a check() on the constructed DAG ! -> has to be done in the calling module
        """
        self.pgmpy_object = BayesianModel()
        if not (from_object):
            network, dictionary, header = self.get_network()
        else:
            network = digraph
        nw_nodes = network.nodes(data=True)
        nw_edges = network.edges()
        '''adding nnodes and edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            self.pgmpy_object.add_node(node_name)
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    self.pgmpy_object.add_edge(node_name, edge[1])
        '''add  cpd's'''
        for i, node in enumerate(nw_nodes):
            parent_nodes = network.in_edges(node[0])
            parent_nodes = self.clean_parent_list(parent_nodes, node[0])
            cpd = node[1]['cpd']
            ''' find the cardinality of the node '''
            cardinality_node = node[1]['cardinality']
            """  cardinality card of parents has to be determined"""
            cardinality_parents = []
            for i, nod in enumerate(parent_nodes):
                cardinality_parents.append(network.node[nod]['cardinality'])
            ''' Depending on the place in the BN and/or the number of parents  the PGMPY CPD methods have another call'''
            if len(cardinality_parents) == 0:
                self.pgmpy_object.add_cpds(
                    TabularCPD(variable=node[0],
                               variable_card=cardinality_node,
                               values=[cpd]))
                continue
            table = TabularCPD(variable=node[0], variable_card= cardinality_node, values=cpd, \
                              evidence=parent_nodes,\
                              evidence_card=np.asarray(cardinality_parents))
            self.pgmpy_object.add_cpds(table)
        '''------TO DELETE-------------'''
        # pgmpy_object.check_model()
        # draw_network(pgmpy_object)
        '''-----------------------------'''
        return self.pgmpy_object, self.dictionary, self.header

    def get_network(self):
        """
        Reads the passed json file and translate it's content in a networkx class object
        - The nodes in the object are renamed so they have a standardized signature
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :return: a networkx class object, dictionary as an array of tuples and the header of the json file

        :type file : string
        :rtype : networkx class object, array of tuples, dictionary
        """
        self.dictionary = []
        self.networkx_object = nx.DiGraph()
        with open(self.file) as f:
            data = f.read()
        '''Remove possible non informative characters'''
        data = re.sub('\n', '', data)
        data = re.sub('\t', '', data)
        data = json.loads(data)
        self.header = data['header']
        '''Feeding G with the nodes'''
        cardinality = {}
        for key in data['Nodes'].keys():
            for secondkey in data['Nodes'][key].keys():
                for c, n in enumerate(data['Nodes'][key][secondkey]):
                    node = secondkey + "_" + str(c)
                    self.networkx_object.add_node(node, {
                        'cardinality': n[1],
                        'cpd': []
                    })
                    self.dictionary.append((n[0], node))
                    cardinality.update(
                        {node: n[1]}
                    )  #this contains the cardinality of each node with the node name as dictionary entry
        '''Feeding G with the edges'''
        edges = []
        for j, pair in enumerate(data['Edges']):
            for parent in pair.keys():
                for child in data['Edges'][j][parent]:
                    parent_ = self.translation(parent, 0)
                    child_ = self.translation(child, 0)
                    edges.append((parent_, child_))
        np.ravel(edges)
        self.networkx_object.add_edges_from(edges)
        '''Feeding G with the  CPD's as nodes attributes'''
        for j, node in enumerate(data['CPDs']):
            for parent, cpd in node.items():
                node_ = self.translation(parent, 0)
                self.networkx_object.node[node_]['cpd'] = cpd
        '''TO REMOVE LATER'''
        # plt.figure(figsize=(10, 5))
        # pos = nx.circular_layout(G, scale=2)
        # node_labels = nx.get_node_attributes(G, 'cpd')
        # nx.draw(G, pos, node_size=1200, node_color='lightblue',
        #         linewidths=0.25,  font_size=10, font_weight='bold', with_labels=True)
        # plt.show()
        return self.networkx_object, self.dictionary, self.header

    def create_json_file(self, **kwargs):
        """
        EWAMPLE :

        A helping method if the user prefers to create the BN within the code

        :param case_name: the file name without path or extension where the json file will be saved
        :param : **kwargs takes the following variables:
                                                            description = kwargs.get('description', '')
                                                            train_from = kwargs.get('train_from', '')
                                                            cpds = kwargs.get('CPDs', [])
                                                            bens = kwargs.get('BENS',[])
                                                            mems = kwargs.get('MEMS', [])
                                                            lans = kwargs.get('LANS', [])
                                                            motors = kwargs.get('MOTORS', [])
                                                            world = kwargs.get('WORLD', [])
                                                            edges = kwargs.get('Edges', [])
                                                            frozen = kwargs.get('frozen',False)
        .
        .
        .
        :return: void

        :type case_name : string
        :type  :
        .
        .
        .
        :rtype : void
        """
        description = kwargs.get('description', '')
        train_from = kwargs.get('train_from', '')
        cpds = kwargs.get('CPDs', [])
        bens = kwargs.get('BENS', [])
        mems = kwargs.get('MEMS', [])
        lans = kwargs.get('LANS', [])
        motors = kwargs.get('MOTORS', [])
        world = kwargs.get('WORLD', [])
        edges = kwargs.get('Edges', [])
        frozen = kwargs.get('frozen', False)

        #json_tab_file_write = JSONTabIndentFileWriter( Case_name,5a)
        data = self.get_empty_canvas()
        '''       - the 3 next items are for tracking purpose only, not fundamentally necessary'''
        data["header"]['Identification'] = self.file
        data["header"]['Date'] = datetime.datetime.now().strftime("%c")
        data["header"]['Description'] = description
        '''       - the next item gives a file containing possible training data (OPTIONAL)'''
        data["header"]['Train_from'] = train_from
        '''      Frozen tells whether or not the model can be considered as final i.e. is there still "training" needed'''
        data["header"]['Frozen'] = frozen
        '''       - the 5 next lines tells how much nodes  and their names + cardinality the model will start with
                    the names can be any valid python string'''
        bens = [['pooping', 2], ['peeing', 2], ['constipated', 2]]
        mems = [['havenotoiletpaper', 2]]
        lans = [['diarhea', 2], ['happypoop', 2]]
        motors = [['asshole1', 2], ['asshole2', 2]]
        world = [['toilet1', 2], ['toilet2', 2], ['garden1', 2],
                 ['garden2', 2], ['doctor', 2]]
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs'''
        edges = []
        '''       !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges.append({'pooping': ['toilet1', 'diarhea', 'happypoop']})
        edges.append({'peeing': ['toilet2', 'garden1', 'garden2']})
        edges.append({'constipated': ['doctor']})
        edges.append({'havenotoiletpaper': ['garden1', 'garden2']})
        edges.append(
            {'diarhea': ['toilet1', 'doctor', 'asshole1', 'asshole2']})
        edges.append(
            {'happypoop': ['garden1', 'garden2', 'asshole1', 'asshole2']})
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        cpds.append({'pooping': [0.5, 0.5]})
        cpds.append({'peeing': [0.2, 0.8]})
        cpds.append({'constipated': [0.9, 0.1]})
        cpds.append({'havenotoiletpaper': [0.6, 0.4]})
        cpds.append({'happypoop': [[0.3, 0.8], [0.7, 0.2]]})
        cpds.append({'diarhea': [[0.8, 0.3], [0.2, 0.7]]})
        cpds.append({'toilet1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole2': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'toilet2': [[0.5, 0.5], [0.5, 0.5]]})
        cpds.append({'doctor': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({
            'garden1': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        cpds.append({
            'garden2': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        '''       - feeding the data'''
        data["Nodes"]['RONS']['BENS'] = bens
        data["Nodes"]['RONS']['MEMS'] = mems
        data["Nodes"]['LANS']['LANS'] = lans
        data["Nodes"]['LENS']['MOTOR'] = motors
        data["Nodes"]['LENS']['WORLD'] = world
        data["Edges"] = edges
        data["CPDs"] = cpds
        ''' dumping to CASENAME file in jason format'''
        self.save_json(json.dumps(data))

        print("Json file for  - ", self.file, "  - created")

    def create_json_template(self):
        """
        A helping method if the  jason template in the project_repository ditectory has been deleted or corrupted

        :param : void
        :return: void

        :type : void
        :rtype : void
        """
        self.get_json_path(
            "Template"
        )  # creates the right path in which case_name will be saved
        data = self.get_empty_canvas()
        data['header']['Identification'] = self.file
        '''Filling some dummies to facilitate the user'''
        a_node = ['*', 0]
        an_edge = {'*': ['&', '&', '&']}
        a_cpd = {'*': [[0, 0, 0], [0, 0, 0]]}
        nodes = []
        edges = []
        cpds = []
        for i in range(0, 3):
            nodes.append(a_node)
            edges.append(an_edge)
            cpds.append(a_cpd)

        data['Nodes']['RONS']['BENS'] = nodes
        data['Nodes']['RONS']['MEMS'] = nodes
        data['Nodes']['LANS']['LANS'] = nodes
        data['Nodes']['LENS']['MOTOR'] = nodes
        data['Nodes']['LENS']['WORLD'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        ''' dumping to CASENAME file in jason format'''
        # with open(case_name, 'w') as f:
        #     json.dump(data, f, separators = (",",":"))
        self.save_json(json.dumps(data))
        print("Empty template created")

    def get_empty_canvas(self):
        """
         This method creates a json canvas which will be used for the several json creating method

         :param : void
         :return: a dictionary with the structure of the json file
         :type : non
         :rtype : dictionary
         """

        data = {
            'header': {
                'Identification': '',
                'Date': '',
                'Description': '',
                'Frozen': '',
                'Train_from': ''
            },
            'Nodes': {},
            'Edges': [],
            'CPDs': []
        }
        '''       - the 5 next lines tells how much nodes  and their names the model will start with
                    the names can be any valid python string'''
        bens = []
        mems = []
        lans = []
        motors = []
        world = []
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs

                 !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges = []
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        '''       - feeding the data'''
        data['Nodes'] = {
            'RONS': {
                'BENS': bens,
                'MEMS': mems
            },
            'LANS': {
                'LANS': lans
            },
            'LENS': {
                'MOTOR': motors,
                'WORLD': world
            }
        }
        data['Edges'] = edges
        data['CPDs'] = cpds
        return data
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                                ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade',
                               3,
                               values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'),
                                 ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'),
                                     ('d', 'b'), ('e', 'd')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'),
                                                      ('b', 'c'), ('d', 'b'),
                                                      ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'),
                           ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'),
                                     ('d', 'b'), ('d', 'e')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'),
                                                      ('d', 'a'), ('d', 'b'),
                                                      ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'),
                         Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'),
                         Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'),
                         Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'),
                         Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'),
                         Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(),
                         Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [
            0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04,
            0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128
        ]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'),
                            ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Exemple #31
0
class Network_handler:
    '''
    Handles creation and usage of the probabilistic network over CERN's data.
    Can deal only with a SINGLE file-priority combination.
    Note that the methods of this class have numbers and must be called in order.
    '''
    def __init__(self, pnh, gh):
        '''
        Constructor
        '''
        extractor = pnh.get_data_extractor()
        self.best_model = BayesianModel()
        self.training_instances = ""
        self.device_considered = pnh.get_device()
        self.priority_considered = pnh.get_priority()
        self.markov = MarkovModel()
        self.general_handler = gh
        self.variables_names = extractor.get_variable_names()
        self.rankedDevices = extractor.get_ranked_devices()
        self.data = pnh.get_dataframe()
        self.file_writer = pnh.get_file_writer()
        self.file_suffix = pnh.get_file_suffix()

    def learn_structure(self, method, scoring_method, log=True):
        ''' (4)
        Method that builds the structure of the data
        -----------------
        Parameters:
        method          : The technique used to search for the structure
            -> scoring_approx     - To use an approximated search with scoring method
            -> scoring_exhaustive - To use an exhaustive search with scoring method
            -> constraint         - To use the constraint based technique
        scoring_method : K2, bic, bdeu
        log             - "True" if you want to print debug information in the console    
        '''

        #Select the scoring method for the local search of the structure
        if scoring_method == "K2":
            scores = K2Score(self.data)
        elif scoring_method == "bic":
            scores = BicScore(self.data)
        elif scoring_method == "bdeu":
            scores = BdeuScore(self.data)

        #Select the actual method
        if method == "scoring_approx":
            est = HillClimbSearch(self.data, scores)
        elif method == "scoring_exhaustive":
            est = ExhaustiveSearch(self.data, scores)
        elif method == "constraint":
            est = ConstraintBasedEstimator(self.data)

        self.best_model = est.estimate()
        self.eliminate_isolated_nodes(
        )  # REMOVE all nodes not connected to anything else

        for edge in self.best_model.edges_iter():
            self.file_writer.write_txt(str(edge))

        self.log("Method used for structural learning: " + method, log)
        #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log)
        self.log("Search terminated", log)

    def estimate_parameters(self, log=True):
        ''' (5)
        Estimates the parameters of the found network
        '''
        estimator = BayesianEstimator(self.best_model, self.data)
        self.file_writer.write_txt("Number of nodes: " +
                                   str(len(self.variables_names)))
        self.file_writer.write_txt("Complete list: " +
                                   str(self.variables_names))

        for node in self.best_model.nodes():
            cpd = estimator.estimate_cpd(node, prior_type='K2')
            self.best_model.add_cpds(cpd)
            self.log(cpd, log)
            self.file_writer.write_txt(cpd.__str__())

    def inference(self, variables, evidence, mode="auto", log=True):
        ''' (6)
        Computes the inference over some variables of the network (given some evidence)
        '''

        inference = VariableElimination(self.best_model)
        #inference = BeliefPropagation(self.markov)
        #inference = Mplp(self.best_model)
        header = "------------------- INFERENCE ------------------------"
        self.log(header, log)
        self.file_writer.write_txt(header, newline=True)
        self.file_writer.write_txt("(With parents all set to value 1)")

        if mode == "auto":
            self.log("          (with parents all set to value 1)", log)
            for node in self.best_model.nodes():
                variables = [node]
                parents = self.best_model.get_parents(node)
                evidence = dict()
                for p in parents:
                    evidence[p] = 1
                phi_query = inference.query(variables, evidence)
                for key in phi_query:
                    self.file_writer.write_txt(str(phi_query[key]))
                    self.log(phi_query[key], log)

        elif mode == "manual":
            phi_query = inference.query(variables, evidence)
            for key in phi_query:
                self.log(phi_query[key], log)
            '''
            map_query = inference.map_query(variables, evidence)
            print(map_query)
            '''

    def draw_network(self, label_choice, location_choice, location, log):
        ''' (7) 
        Draws the bayesian network.
        ----
        location_choice = True iff we want to show the location of devices in the graph.
        label_choice = "single" if we want to show single label, "double" for double label of arcs
        location = 0,1,2 depending by the location (H0, H1, H2)
        '''
        bn_graph = gv.Digraph(format="png")

        # Extract color based on the building
        if location_choice:

            devices = self.variables_names
            device_location = dict()
            device_locationH1 = dict()

            #For H0
            for d in devices:
                allDevicesLocations = self.general_handler.get_device_locations(
                )
                device_location[d] = allDevicesLocations[d][0]
                device_locationH1[d] = allDevicesLocations[d][1]  #temp for H1
            location_color = self.assign_color(device_location)
            location_colorH1 = self.assign_color(device_locationH1)
            '''
            # Logging and saving info
            self.log(device_location, log)
            self.log(location_color, log)
            self.file_writer.write_txt(device_location, newline = True)
            self.file_writer.write_txt(location_color, newline = True)
            '''

            # Creating the subgraphs, one for each location:
            loc_subgraphs = dict()
            for loc in location_color:
                name = "cluster_" + loc
                loc_subgraphs[loc] = gv.Digraph(name)
                loc_subgraphs[loc].graph_attr[
                    'label'] = loc  #Label with name to be visualized in the image

        # Create nodes
        for node in self.best_model.nodes():
            if location_choice:
                locationH0 = device_location[node]
                locationH1 = device_locationH1[node]
                loc_subgraphs[locationH0].node(
                    node,
                    style='filled',
                    fillcolor=location_colorH1[locationH1]
                )  #add the node to the right subgraph
                #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0
            else:
                bn_graph.node(node)

        # Add all subgraphs in the final graph:
        if location_choice:
            for loc in loc_subgraphs:
                bn_graph.subgraph(loc_subgraphs[loc])

        # Create and color edges
        for edge in self.best_model.edges_iter():
            inference = VariableElimination(self.best_model)
            label = ""

            # Inference for first label and color of edges
            variables = [edge[1]]
            evidence = dict()
            evidence[edge[0]] = 1
            phi_query = inference.query(variables, evidence)
            value = phi_query[edge[1]].values[1]
            value = round(value, 2)

            if label_choice == "single":
                label = str(value)

            if label_choice == "double":
                # Inference for second label
                variables = [edge[0]]
                evidence = dict()
                evidence[edge[1]] = 1
                phi_query = inference.query(variables, evidence)
                value_inv = phi_query[edge[0]].values[1]
                value_inv = round(value_inv, 2)
                label = str(value) + "|" + str(value_inv)

            if value >= 0.75:
                bn_graph.edge(edge[0], edge[1], color="red", label=label)
            else:
                bn_graph.edge(edge[0], edge[1], color="black", label=label)

        # Save the .png graph
        if self.device_considered == "CUSTOM":
            imgPath = '../../output/CUSTOM' + self.file_suffix
        else:
            if location_choice:
                locat = "_H0H1"
            else:
                locat = ""
            imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat
        bn_graph.render(imgPath)
        os.remove(imgPath)  #remove the source code generated by graphviz

    def data_info(self, selection, log):
        ''' (9) Prints or logs some extra information about the data or the network
        '''
        # 1 - DEVICE FREQUENCY AND OCCURRENCES
        if 1 in selection:
            self.file_writer.write_txt(
                "Device ranking (max 20 devices are visualized)", newline=True)
            i = 1
            for dr in self.rankedDevices:
                self.file_writer.write_txt(dr[0] + "             \t" +
                                           str(dr[1]) + "\t" + str(dr[2]))
                i = i + 1
                if i == 20:
                    break

        # 2 - EDGES OF THE NETWORK
        if 2 in selection:
            self.file_writer.write_txt("Edges of the network:", newline=True)
            for edge in self.best_model.edges_iter():
                self.file_writer.write_txt(str(edge))

        # 3 - MARKOV NETWORK
        if 3 in selection:
            self.markov = self.best_model.to_markov_model(
            )  #create the markov model from the BN
            nice_graph = pydot.Dot(graph_type='graph')
            for node in self.markov.nodes():
                node_pydot = pydot.Node(node)
                nice_graph.add_node(node_pydot)
            for edge in self.markov.edges():
                edge_pydot = pydot.Edge(edge[0], edge[1], color="black")
                nice_graph.add_edge(edge_pydot)
            nice_graph.write_png('../../output/' + self.device_considered +
                                 '_' + self.priority_considered +
                                 '-markov.png')

            self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True)
            for factor in self.markov.factors:
                self.log("MARKOV---------------------------------------", log)
                self.log(factor, log)
                self.file_writer.write_txt(factor.__str__())

        # 4 - INFERENCE NETWORK
        if 4 in selection:
            nice_graph = pydot.Dot(graph_type='digraph')
            nodes = self.best_model.nodes()
            inference = VariableElimination(self.best_model)
            for node1 in nodes:
                pos = nodes.index(node1) + 1
                for i in range(pos, len(nodes)):
                    node2 = nodes[i]
                    variables = [node2]
                    evidence = dict()
                    evidence[node1] = 1
                    phi_query = inference.query(variables, evidence)
                    prob1 = phi_query[node2].values[
                        1]  #probability of direct activation (inference from node1=1 to node2)
                    variables = [node1]
                    evidence = dict()
                    evidence[node2] = 1
                    phi_query = inference.query(variables, evidence)
                    prob2 = phi_query[node1].values[
                        1]  #probability of inverse activation (inference from node2=1 to node1)
                    prob1 = round(prob1, 2)
                    prob2 = round(prob2, 2)
                    if prob1 >= 0.75 and (
                            prob1 - prob2
                    ) <= 0.40:  #add direct arc from node1 to node2
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob1) + "|" + str(prob2)
                        nice_graph.add_edge(
                            pydot.Edge(node1,
                                       node2,
                                       color="red",
                                       label=double_label))
                    elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob2) + "|" + str(prob1)
                        nice_graph.add_edge(
                            pydot.Edge(node2,
                                       node1,
                                       color="red",
                                       label=double_label))
                    elif prob1 >= 0.75 and prob2 >= 0.75:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="orange",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="orange",
                                           label=double_label))
                    elif prob1 >= 0.55 and prob2 >= 0.55:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="black",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="black",
                                           label=double_label))

            if self.device_considered == "CUSTOM":
                imgPath = '../../output/CUSTOM' + self.file_suffix
                nice_graph.write_png(imgPath + "-inference_network.png")
            else:
                nice_graph.write_png('../../output/' + self.device_considered +
                                     '_' + self.priority_considered +
                                     '-inference_network.png')

    def fix_node_presence(self, nodes, pydot_graph):
        ''' Adds the list of nodes to the graph, if they are not already present '''
        for node in nodes:
            if node not in pydot_graph.get_nodes():
                pydot_graph.add_node(pydot.Node(node))

    def eliminate_isolated_nodes(self):
        '''
        If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph
        '''
        for nodeX in self.best_model.nodes():
            tup = [item for item in self.best_model.edges() if nodeX in item]
            if not tup:
                self.file_writer.write_txt(
                    "Node " + str(nodeX) +
                    " has no edges: it has been eliminated.")
                self.best_model.remove_node(nodeX)
        if self.best_model.nodes() == []:
            raise DataError("No nodes left in this file-priority combination.")

    def assign_color(self, device_location):
        '''
        Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS)
        '''
        system_color = [
            'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey',
            'Light Red', 'Light Blue', 'Light Green'
        ]
        location_color = dict()  # key = location; value = color
        for dev, loc in device_location.items():
            if loc not in location_color:
                color = system_color[0]
                system_color.remove(color)
                location_color[loc] = color
        return location_color

    def log(self, text, log):
        ''' Prints the text in the console, if the "log" condition is True. '''
        if log:
            print(text)
                       values=[[0.998], [0.002]])
cpd_alarm = TabularCPD(variable='Alarm', variable_card=2,
                        values=[[0.999, 0.71, 0.06, 0.05],
                                [0.001, 0.29, 0.94, 0.95]],
                        evidence=['Burglary', 'Earthquake'],
                        evidence_card=[2, 2])
cpd_johncalls = TabularCPD(variable='JohnCalls', variable_card=2,
                      values=[[0.95, 0.1], [0.05, 0.9]],
                      evidence=['Alarm'], evidence_card=[2])
cpd_marycalls = TabularCPD(variable='MaryCalls', variable_card=2,
                      values=[[0.1, 0.7], [0.9, 0.3]],
                      evidence=['Alarm'], evidence_card=[2])

# Associating the parameters with the model structure
alarm_model.add_cpds(cpd_burglary, cpd_earthquake, cpd_alarm, cpd_johncalls, cpd_marycalls)


#new cell
alarm_model.check_model() 

#new cell
alarm_model.nodes()

#new cell
alarm_model.edges()

#new cell
alarm_model.local_independencies('Burglary')

#new cell
alarm_model.local_independencies('JohnCalls')
Exemple #33
0
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(
            hf.recursive_sorted(self.G.edges()),
            hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'),
                                 ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'),
                                                              ('b', 'c'),
                                                              ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
Exemple #34
0
class TestGibbsSampling(unittest.TestCase):
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)

    def tearDown(self):
        del self.bayesian_model
        del self.markov_model

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True)
    @patch('pgmpy.models.MarkovChain.__init__', autospec=True)
    def test_init_bayesian_model(self, init, get_kernel):
        model = MagicMock(spec_set=BayesianModel)
        gibbs = GibbsSampling(model)
        init.assert_called_once_with(gibbs)
        get_kernel.assert_called_once_with(gibbs, model)

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True)
    def test_init_markov_model(self, get_kernel):
        model = MagicMock(spec_set=MarkovModel)
        gibbs = GibbsSampling(model)
        get_kernel.assert_called_once_with(gibbs, model)

    def test_get_kernel_from_bayesian_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_bayesian_model(self.bayesian_model)
        self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3})

    def test_get_kernel_from_markov_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_markov_model(self.markov_model)
        self.assertListEqual(list(gibbs.variables), self.markov_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2})

    def test_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(start_state, 2)
        self.assertEquals(len(sample), 2)
        self.assertEquals(len(sample.columns), 3)
        self.assertIn('diff', sample.columns)
        self.assertIn('intel', sample.columns)
        self.assertIn('grade', sample.columns)
        self.assertTrue(set(sample['diff']).issubset({0, 1}))
        self.assertTrue(set(sample['intel']).issubset({0, 1}))
        self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_sample_less_arg(self, random_state):
        self.gibbs.state = None
        random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(size=2)
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(sample), 2)


    def test_generate_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        gen = self.gibbs.generate_sample(start_state, 2)
        samples = [sample for sample in gen]
        self.assertEqual(len(samples), 2)
        self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'})
        self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'})


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_generate_sample_less_arg(self, random_state):
        self.gibbs.state = None
        gen = self.gibbs.generate_sample(size=2)
        samples = [sample for sample in gen]
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(samples), 2)
Exemple #35
0
# Bayesian network for students
from pgmpy.models import BayesianModel
model = BayesianModel()
# Add nodes
model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter'])
print(model.nodes())
# Add edges
model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')])
print(model.edges())
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.g.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [["a", "b"], ["b", "c"]])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node("a")
        self.assertListEqual(list(self.G.nodes()), ["a"])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(["a", "b", "c", "d"])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d"])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge("d", "e")
        self.assertListEqual(sorted(self.G.nodes()), ["d", "e"])
        self.assertListEqual(list(self.G.edges()), [("d", "e")])
        self.G.add_nodes_from(["a", "b", "c"])
        self.G.add_edge("a", "b")
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [["a", "b"], ["d", "e"]])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, "a", "a")

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([("a", "b"), ("a", "c")])
        self.assertRaises(ValueError, self.G.add_edge, "c", "a")

    def test_add_edges_from_string(self):
        self.G.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [["a", "b"], ["b", "c"]])
        self.G.add_nodes_from(["d", "e", "f"])
        self.G.add_edges_from([("d", "e"), ("e", "f")])
        self.assertListEqual(sorted(self.G.nodes()),
                             ["a", "b", "c", "d", "e", "f"])
        self.assertListEqual(
            hf.recursive_sorted(self.G.edges()),
            hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"),
                                 ("e", "f")]),
        )

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [("a", "b"),
                                                              ("b", "c"),
                                                              ("c", "a")])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.g.predecessors("a")), [])
        self.assertListEqual(list(self.g.predecessors("b")), ["a"])
        self.assertListEqual(list(self.g.predecessors("c")), ["b"])

    def test_update_node_parents(self):
        self.G.add_nodes_from(["a", "b", "c"])
        self.G.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.G.predecessors("a")), [])
        self.assertListEqual(list(self.G.predecessors("b")), ["a"])
        self.assertListEqual(list(self.G.predecessors("c")), ["b"])

    def tearDown(self):
        del self.G
Exemple #37
0
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
# Generating random data for two coin tossing examples
raw_data = np.random.randint(low=0, high=2, size=(1000, 2))
data = pd.DataFrame(raw_data, columns=['X', 'Y'])
print(data)
coin_model = BayesianModel()
coin_model.fit(data, estimator=BayesianEstimator)
coin_model.get_cpds()
coin_model.nodes()
coin_model.edges()
Exemple #38
0
    def estimate(self, start=None, tabu_length=0, max_indegree=None):
        """
        Performs local hill climb search to estimates the `BayesianModel` structure
        that has optimal score, according to the scoring method supplied in the constructor.
        Starts at model `start` and proceeds by step-by-step network modifications
        until a local maximum is reached. Only estimates network structure, no parametrization.

        Parameters
        ----------
        start: BayesianModel instance
            The starting point for the local search. By default a completely disconnected network is used.
        tabu_length: int
            If provided, the last `tabu_length` graph modifications cannot be reversed
            during the search procedure. This serves to enforce a wider exploration
            of the search space. Default value: 100.
        max_indegree: int or None
            If provided and unequal None, the procedure only searches among models
            where all nodes have at most `max_indegree` parents. Defaults to None.

        Returns
        -------
        model: `BayesianModel` instance
            A `BayesianModel` at a (local) score maximum.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.estimators import HillClimbSearch, BicScore
        >>> # create data sample with 9 random variables:
        ... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI'))
        >>> # add 10th dependent variable
        ... data['J'] = data['A'] * data['B']
        >>> est = HillClimbSearch(data, scoring_method=BicScore(data))
        >>> best_model = est.estimate()
        >>> sorted(best_model.nodes())
        ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
        >>> best_model.edges()
        [('B', 'J'), ('A', 'J')]
        >>> # search a model with restriction on the number of parents:
        >>> est.estimate(max_indegree=1).edges()
        [('J', 'A'), ('B', 'J')]
        """
        epsilon = 1e-8
        nodes = self.state_names.keys()
        if start is None:
            start = BayesianModel()
            start.add_nodes_from(nodes)
        elif not isinstance(start, BayesianModel) or not set(
                start.nodes()) == set(nodes):
            raise ValueError(
                "'start' should be a BayesianModel with the same variables as the data set, or 'None'."
            )

        tabu_list = []
        current_model = start

        while True:
            best_score_delta = 0
            best_operation = None

            for operation, score_delta in self._legal_operations(
                    current_model, tabu_list, max_indegree):
                if score_delta > best_score_delta:
                    best_operation = operation
                    best_score_delta = score_delta

            if best_operation is None or best_score_delta < epsilon:
                break
            elif best_operation[0] == '+':
                current_model.add_edge(*best_operation[1])
                tabu_list = ([('-', best_operation[1])] +
                             tabu_list)[:tabu_length]
            elif best_operation[0] == '-':
                current_model.remove_edge(*best_operation[1])
                tabu_list = ([('+', best_operation[1])] +
                             tabu_list)[:tabu_length]
            elif best_operation[0] == 'flip':
                X, Y = best_operation[1]
                current_model.remove_edge(X, Y)
                current_model.add_edge(Y, X)
                tabu_list = ([best_operation] + tabu_list)[:tabu_length]

        return current_model