def create_networks(self):
        network = BayesianModel([('goal_left', 'motor_left'),
                                 ('goal_right', 'motor_right'),
                                 ('obstacle_left', 'motor_left'),
                                 ('obstacle_right', 'motor_right'),
                                 ('obstacle_left', 'vision_1'),
                                 ('obstacle_left', 'vision_2'),
                                 ('obstacle_left', 'vision_3'),
                                 ('obstacle_right', 'vision_4'),
                                 ('obstacle_right', 'vision_5'),
                                 ('obstacle_right', 'vision_6')])

        cpd_1 = double_hypo_cpd('motor_left', 'goal_left', 'obstacle_left')
        cpd_2 = double_hypo_cpd('motor_right', 'goal_right', 'obstacle_right')

        cpd_3 = wandering_hypo_cpd('goal_left')
        cpd_4 = wandering_hypo_cpd('goal_right')
        cpd_5 = obstacle_hypo_cpd('obstacle_left')
        cpd_6 = obstacle_hypo_cpd('obstacle_right')

        cpd_7 = single_hypo_cpd('vision_1', 'obstacle_left')
        cpd_8 = single_hypo_cpd('vision_2', 'obstacle_left')
        cpd_9 = single_hypo_cpd('vision_3', 'obstacle_left')
        cpd_10 = single_hypo_cpd('vision_4', 'obstacle_right')
        cpd_11 = single_hypo_cpd('vision_5', 'obstacle_right')
        cpd_12 = single_hypo_cpd('vision_6', 'obstacle_right')

        network.add_cpds(cpd_1, cpd_2, cpd_3, cpd_4, cpd_5, cpd_6, cpd_7,
                         cpd_8, cpd_9, cpd_10, cpd_11, cpd_12)
        network.check_model()

        return {
            'main': GenerativeModel(SensoryInputVirtualPeepo(self), network)
        }
Example #2
0
    def generate_cpds(self):
        model = BayesianModel([(str(a), str(b))
                               for a, b in self.graph.edges()])

        variable_cards = {}
        cpds = []
        for n in nx.topological_sort(self.graph):
            causes = sorted(self.graph.predecessors(n))
            variable_card = random.choice([2, 3, 4, 5])
            variable_cards[n] = variable_card
            if len(causes) == 0:
                values = np.random.rand(1, variable_card)
                values = values / np.sum(values)
                cpd = TabularCPD(variable=str(n),
                                 variable_card=variable_card,
                                 values=values)
                cpds.append(cpd)
            else:
                evidence_card = [variable_cards[i] for i in causes]
                values = np.random.rand(variable_card, np.prod(evidence_card))
                values = values / np.sum(values, axis=0)
                cpd = TabularCPD(variable=str(n),
                                 variable_card=variable_card,
                                 values=values,
                                 evidence=[str(a) for a in causes],
                                 evidence_card=evidence_card)
                cpds.append(cpd)

        model.add_cpds(*cpds)
        model.check_model()

        self.model = model
Example #3
0
def createBayesianNetwork():
    # defining the network structure
    model = BayesianModel([('asia', 'tub'), ('smoke', 'bronc'),
                           ('smoke', 'lung'), ('lung', 'either'),
                           ('tub', 'either'), ('bronc', 'dysp'),
                           ('either', 'xray'), ('either', 'dysp')])
    # defining the parameters
    cpd_asia = TabularCPD(variable='asia',
                          variable_card=2,
                          values=[[0.01], [0.99]])
    cpd_smoke = TabularCPD(variable='smoke',
                           variable_card=2,
                           values=[[0.5], [0.5]])
    cpd_tub = TabularCPD(variable='tub',
                         variable_card=2,
                         values=[[0.05, 0.99], [0.95, 0.01]],
                         evidence=['asia'],
                         evidence_card=[2])
    cpd_lung = TabularCPD(variable='lung',
                          variable_card=2,
                          values=[[0.1, 0.99], [0.9, 0.01]],
                          evidence=['smoke'],
                          evidence_card=[2])
    cpd_bronc = TabularCPD(variable='bronc',
                           variable_card=2,
                           values=[[0.6, 0.7], [0.4, 0.3]],
                           evidence=['smoke'],
                           evidence_card=[2])
    cpd_either = TabularCPD(variable='either',
                            variable_card=2,
                            values=[[1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0,
                                                           1.0]],
                            evidence=['lung', 'tub'],
                            evidence_card=[2, 2])
    cpd_xray = TabularCPD(variable='xray',
                          variable_card=2,
                          values=[[0.98, 0.95], [0.02, 0.05]],
                          evidence=['either'],
                          evidence_card=[2])
    cpd_dysp = TabularCPD(variable='dysp',
                          variable_card=2,
                          values=[[0.9, 0.7, 0.8, 0.1], [0.1, 0.3, 0.2, 0.9]],
                          evidence=['bronc', 'either'],
                          evidence_card=[2, 2])
    # Associating the CPDs with the network
    model.add_cpds(cpd_asia, cpd_smoke, cpd_tub, cpd_lung, cpd_bronc,
                   cpd_either, cpd_xray, cpd_dysp)
    model.check_model()
    return model
Example #4
0
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR):
    print('Making bayes net')
    graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p'
    if os.path.isfile(graph_file) and load == True:
        print('Loading saved graph from file...')
        G = pickle.load(open(graph_file, 'rb'))
        G.check_model()
    else:
        print('loading data...')
        training_labels, go_dict = load_label_data()
        if subtree:
            labels_list = _subtree_labels()
            print(labels_list)
        else:
            labels_list = go_dict.keys()

        print('adding nodes and edges...')
        G = BayesianModel()
        G.add_edges_from([(label, label + '_hat') for label in labels_list])
        obo_graph = obonet.read_obo(OBODB_FILE)
        for label in labels_list:
            children = [
                c for c in networkx.ancestors(obo_graph, label)
                if c in labels_list
            ]
            for child in children:
                G.add_edge(child, label)

        predicted_cpds = get_model_cpds(labels_list=labels_list,
                                        modelsdir=MODEL_CPDS_DIR)
        for cpd in predicted_cpds:
            G.add_cpds(cpd)
        true_label_cpds = get_true_label_cpds(training_labels,
                                              go_dict,
                                              labels_list=labels_list)
        for cpd in true_label_cpds:
            G.add_cpds(cpd)
        remove_list = []
        for node in G.nodes():
            if G.get_cpds(node) == None:
                remove_list.append(node)
                # remove_list.append(node+'_hat')
        for node in remove_list:
            if node in G:
                G.remove_node(node)
        G.check_model()
        pickle.dump(G, open(graph_file, 'wb'))
    return G
Example #5
0
def make_DAG(DAG, CPD=None, checkmodel=True, verbose=3):
    """Create Directed Acyclic Graph based on list.

    Parameters
    ----------
    DAG : list
        list containing source and target in the form of [('A','B'), ('B','C')].
    CPD : list, array-like
        Containing TabularCPD for each node.
    checkmodel : bool
        Check the validity of the model. The default is True
    verbose : int, optional
        Print progress to screen. The default is 3.
        0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE

    Raises
    ------
    Exception
        Should be list.

    Returns
    -------
    pgmpy.models.BayesianModel.BayesianModel
        model of the DAG.

    """
    if (CPD is not None) and (not isinstance(CPD, list)):
        CPD = [CPD]
    if isinstance(DAG, dict):
        DAG = DAG.get('model', None)
    if (not isinstance(DAG, list)) and ('pgmpy' not in str(type(DAG))):
        raise Exception(
            "[bnlearn] >Error: Input DAG should be a list. in the form [('A','B'), ('B','C')] or a <pgmpy.models.BayesianModel.BayesianModel>"
        )
    elif ('pgmpy' in str(type(DAG))):
        if verbose >= 3:
            print('[bnlearn] >No changes made to existing Bayesian DAG.')
    elif isinstance(DAG, list):
        if verbose >= 3: print('[bnlearn] >Bayesian DAG created.')
        DAG = BayesianModel(DAG)

    if CPD is not None:
        for cpd in CPD:
            DAG.add_cpds(cpd)
            if verbose >= 3: print('[bnlearn] >Add CPD: %s' % (cpd.variable))

        if checkmodel:
            print('[bnlearn] >Model correct: %s' % (DAG.check_model()))

    # Create adjacency matrix from DAG
    out = {}
    out['adjmat'] = _dag2adjmat(DAG)
    out['model'] = DAG
    return out
Example #6
0
def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=3):
    testing_graph = BayesianModel()
    testing_graph.add_nodes_from(bn_truth.causal_graph.nodes())
    for edge in remove_bidirected_edges(graph.edges()):
        try:
            testing_graph.add_edge(edge[0], edge[1])
        except Exception as e:
            try:
                testing_graph.add_edge(edge[1], edge[0])
            except Exception as e:
                print(e)
                continue

    testing_graph.fit(df_samples, estimator=BayesianEstimator)
    testing_graph.check_model()
    bn_test = BayesianNetwork(testing_graph)

    set_observe(bn_test.bn)
    set_observe(bn_truth.bn)

    bn_truth.set_state_names()
    bn_test.set_state_names()

    return {
        'SID':
        SID(bn_truth.causal_graph, bn_test.causal_graph),
        'SHD':
        SHD(bn_truth.causal_graph, bn_test.causal_graph),
        'OD':
        np.mean([
            ODist(bn_truth, bn_test, 1000, discrete=True)
            for _ in range(nb_repeat)
        ]),
        'ID':
        np.mean([
            IDist(bn_truth, bn_test, 1000, discrete=True)
            for _ in range(nb_repeat)
        ])
    }
Example #7
0
def probnet():
    # Defining the model structure. We can define the network by just passing a list of edges.
    model = BayesianModel([('H', 'S'), ('B', 'S'), ('D', 'S')])
    # Defining individual CPDs.
    cpd_h = TabularCPD(variable='H', variable_card=2, values=[[0.2, 0.8]])
    cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.1, 0.9]])
    cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.5, 0.5]])
    cpd_s = TabularCPD(variable='S',
                       variable_card=2,
                       values=[[0.1, 0.2, 0.1, 0.15, 0.4, 0.35, 0.45, 0.43],
                               [0.9, 0.8, 0.9, 0.85, 0.6, 0.65, 0.55, 0.57]],
                       evidence=['H', 'B', 'D'],
                       evidence_card=[2, 2, 2])
    # Associating the CPDs with the network
    model.add_cpds(cpd_h, cpd_b, cpd_d, cpd_s)
    # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
    # defined and sum to 1.
    model.check_model()
    print(model.get_cpds('S'))
    # infer = VariableElimination(model)
    # infer.map_query('S', evidence={'H': 1, 'B': 0, 'D': 1})
    return model
Example #8
0
    def query(self, networkFile, queryFile):
        file1 = open(networkFile)
        lines = file1.readlines()
        model = BayesianModel()
        edges = self.getegdes(lines[0])
        for i in range(int(len(edges) / 2)):
            model.add_edge(edges[2 * i], edges[2 * i + 1])
        for line in lines[1:]:
            variable, variable_card, evidence, evidence_card, values = self.getcpbvar(line)
            cpb = TabularCPD(variable=variable, variable_card=variable_card, evidence=evidence,
                             evidence_card=evidence_card,
                             values=[values])
            model.add_cpds(cpb)
        model.check_model()

        infer = VariableElimination(model)
        # infer.query(['G'], evidence={'S': 0, 'D':1})
        file2 = open(queryFile)
        lines = file2.readlines()
        for line in lines:
            node, evidence2 = self.infer_query(line)
            print(infer.query([node], evidence=evidence2)[node].values)
Example #9
0
def bayesian_network_prediction2(rank, ad_cpt, gh_cpt, ga_cpt, prediction_cpt):
    ###创建模型代码
    # coding: utf-8
    # In[16]:
    # Starting with defining the network structure

    dolores_model = BayesianModel([('ability_difference', 'goals_home'),
                                   ('ability_difference', 'goals_away'),
                                   ('goals_home', 'Prediction'),
                                   ('goals_away', 'Prediction')])
    cpd_AD = TabularCPD(variable='ability_difference', variable_card=42,
                          values=ad_cpt)
    cpd_GH = TabularCPD(variable='goals_home', variable_card=8,
                        values=gh_cpt,
                        evidence=['ability_difference'],
                        evidence_card=[42])
    cpd_GA = TabularCPD(variable='goals_away', variable_card=8,
                        values=ga_cpt,
                        evidence=['ability_difference'],
                        evidence_card=[42])
    cpd_P = TabularCPD(variable='Prediction', variable_card=3,
                            values=prediction_cpt,
                            evidence=['goals_home', 'goals_away'],
                            evidence_card=[8, 8])

    # Associating the parameters with the model structure.
    dolores_model.add_cpds(cpd_AD, cpd_GH, cpd_GA, cpd_P)
    # Checking if the cpds are valid for the model.
    dolores_model.check_model()
    dolores_model.get_independencies()
    from pgmpy.inference import VariableElimination
    inference = VariableElimination(dolores_model)
    pred = inference.query(variables=['Prediction'], evidence={'ability_difference': rank})

    pred_gh = inference.query(variables=['goals_home'], evidence={'ability_difference': rank})
    pred_ga = inference.query(variables=['goals_away'], evidence={'ability_difference': rank})

    return pred.values, pred_gh.values, pred_ga.values
Example #10
0
 def createBayesModel(self, fileName):
     file = open(fileName)
     print(sys.argv[1])
     lines = file.readlines()
     model = BayesianModel()
     edges = self.getegdes(lines[0])
     for i in range(int(len(edges) / 2)):
         model.add_edge(edges[2 * i], edges[2 * i + 1])
     for line in lines[1:]:
         variable, variable_card, evidence, evidence_card, values = self.getcpbvar(
             line)
         cpb = TabularCPD(variable=variable,
                          variable_card=variable_card,
                          evidence=evidence,
                          evidence_card=evidence_card,
                          values=[values])
         model.add_cpds(cpb)
     re = model.check_model()
     print(re)
Example #11
0
class TestBayesianModelCPD(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                                ('i', 's')])

    def test_active_trail_nodes(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's'])

    def test_active_trail_nodes_args(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's'])
        self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's'])

    def test_is_active_trail_triplets(self):
        self.assertTrue(self.G.is_active_trail('d', 'l'))
        self.assertTrue(self.G.is_active_trail('g', 's'))
        self.assertFalse(self.G.is_active_trail('d', 'i'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='g'))
        self.assertFalse(self.G.is_active_trail('d', 'l', observed='g'))
        self.assertFalse(self.G.is_active_trail('i', 'l', observed='g'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='l'))
        self.assertFalse(self.G.is_active_trail('g', 's', observed='i'))

    def test_is_active_trail(self):
        self.assertFalse(self.G.is_active_trail('d', 's'))
        self.assertTrue(self.G.is_active_trail('s', 'l'))
        self.assertTrue(self.G.is_active_trail('d', 's', observed='g'))
        self.assertFalse(self.G.is_active_trail('s', 'l', observed='g'))

    def test_is_active_trail_args(self):
        self.assertFalse(self.G.is_active_trail('s', 'l', 'i'))
        self.assertFalse(self.G.is_active_trail('s', 'l', 'g'))
        self.assertTrue(self.G.is_active_trail('d', 's', 'l'))
        self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l']))

    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')

    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')

    def test_add_single_cpd(self):
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertListEqual(self.G.get_cpds(), [cpd_s])

    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)

    def test_check_model(self):
        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                            ['d', 'i'], [2, 2])

        cpd_s = TabularCPD('s', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                ['i'], 2)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                ['g'], 2)

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertTrue(self.G.check_model())


    def test_check_model1(self):
        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['i'], 2)
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                            ['d', 's'], [2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['l'], 2)
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.8, 0.7]]),
                                                 ['d'], 2)
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4]]),
                                                           ['d', 'i'], [2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6],
                                      [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]),
                                                            ['g', 'd', 'i'], [2, 2, 2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)

    def test_check_model2(self):
        cpd_s = TabularCPD('s', 2, 
                            np.array([[0.5, 0.3],
                                      [0.8, 0.7]]),
                                                ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)


        cpd_g = TabularCPD('g', 2, 
                            np.array([[0.2, 0.3, 0.4, 0.6],
                                      [0.3, 0.7, 0.6, 0.4]]),
                                                            ['d', 'i'], [2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l', 2, 
                            np.array([[0.2, 0.3],
                                      [0.1, 0.7]]),
                                                ['g'], 2)
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)


    def tearDown(self):
        del self.G
Example #12
0
    def configure(self, rf):
        # command format will be the following:
        # trainPGClassifier selfName networkStructure
        print sys.argv

        # read network structure and make graph
        # labels in networkStructure identical to model names
        # networkStructure as a string containing a list of tuples

        # selfName = 'actionPGN'
        # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]"

        selfName = sys.argv[1]
        netStructureString = sys.argv[2]

        netStructure = ast.literal_eval(netStructureString)
        print netStructure

        # collect all model names in a list to extract a unique set
        modelList = []
        for k in netStructure:
            modelList += list(k)
        print list(set(modelList))

        # create a port to connect to /sam/rpc:i to query model path for each model name
        portsList = []
        querySupervisorPort = yarp.RpcClient()
        querySupervisorPortName = '/sam/' + selfName + '/queryRpc'
        querySupervisorPort.open(querySupervisorPortName)

        portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort})
        yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i')
        # ---------------------------------------------------------------------------------------------------------------
        modelDict = dict()
        failFlag = False
        for j in modelList:
            if j != selfName:
                modNameSplit = j.split(' ')
                cmd = yarp.Bottle()
                cmd.addString('dataDir')
                for l in modNameSplit:
                    cmd.addString(l)
                reply = yarp.Bottle()
                querySupervisorPort.write(cmd, reply)
                if reply.get(0).asString() != 'nack':
                    modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None}
                    # try:
                    # load pickle for the model file
                    currPickle = pickle.load(open(reply.get(1).asString(), 'rb'))
                    # try loading labelComparisonDict from the pickle
                    if 'labelComparisonDict' in currPickle.keys():
                        modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict']
                        print j, 'labelComparisonDict loaded'
                    else:
                        print modNameSplit[0], 'labelComparisonDict not found'
                        failFlag = True

                    if 'overallPerformanceLabels' in currPickle.keys():
                        modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels']
                        print j, 'overallPerformanceLabels loaded'
                    else:
                        print j, 'overallPerformanceLabels not found'
                        failFlag = True
                    # except:
                    #     failFlag = True
                else:
                    failFlag = True

        print 'FAIL?', failFlag
        if failFlag:
            return False

        modelList = modelDict.keys()
        print modelList

        # ---------------------------------------------------------------------------------------------------------------

        # extract unique lists from the collected data
        # the unique list of pickleData[original] represents the possibleClassifications for each model
        modelDict[selfName] = dict()
        modelDict[selfName]['labels'] = []
        selfModelCol = 1

        for j in modelList:
            modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])])
            print j, 'unique labels:', modelDict[j]['labels']
            print j, 'CPD shape', modelDict[j]['CPD'].shape

            modelDict[selfName]['labels'] += modelDict[j]['labels']
            selfModelCol *= len(modelDict[j]['labels'])
            print

        # the possibleClassifications for both models (outputs of the PGN)
        # are the unique list of the model specific labels for all models
        modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels']))
        modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original']
        modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol])
        print selfName, 'unique labels:', modelDict[selfName]['labels']
        print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape

        # check that original classifications of both are identical
        # otherwise cannot combine them with a single node.
        # This is currently a big limitation that will be removed later
        print modelDict[selfName]['labels']
        for j in modelList:
            print j,
            for k in range(len(modelDict[j]['pickleData']['original'])):
                print modelDict[j]['pickleData']['original'][k]
                if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']:
                    modelDict[j]['pickleData']['original'][k] = 'unknown'

        for j in modelList:
            if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']:
                failFlag = True
                print 'original classifications of', j, 'are not identical to those of', selfName

        if failFlag:
            return False

        # Update netStructureString to reflect changes in the modelList names
        strSections = netStructureString.split("'")
        for k in range(len(strSections)):
            if len(strSections[k]) > 2 and ',' not in strSections[k]:
                strSections[k] = strSections[k].split(' ')[0]
        netStructureString = "'".join(strSections)
        netStructure = ast.literal_eval(netStructureString)
        # ---------------------------------------------------------------------------------------------------------------
        # iterate through actual labels
        # for each actual label, iterate through models
        # for each model find classification label of this model for current actual label
        # get the index of the current classification and add it to its CPD
        # also calculate which item in the joint CPD needs to be incremented

        for j in range(len(modelDict[selfName]['actualLabels'])):
            currActualLabel = modelDict[selfName]['actualLabels'][j]
            row = modelDict[selfName]['labels'].index(currActualLabel)

            colVar = np.zeros([len(modelList)])
            for k in range(len(modelList)):
                cmod = modelList[k]
                if k != 0:
                    pmod = modelList[k-1]
                    colVar *= len(modelDict[pmod]['labels'])

                colVar[k] = modelDict[cmod]['labels'].index(
                                   modelDict[cmod]['pickleData']['results'][j])
                modelDict[cmod]['CPD'][0, colVar[k]] += 1

            col = sum(colVar)
            modelDict[selfName]['CPD'][row, col] += 1

        # take all CPD's and normalise the matrices
        evidenceCard = copy.deepcopy(modelList)
        for j in modelDict:
            if j == selfName:
                # this is a joint CPD matrix
                # normalise columns to have sum = 1
                modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1')
            else:
                # normalise sum of matrix = 1
                modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD'])
                evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels'])
            print modelDict[j]['CPD']

        model = BayesianModel(netStructure)

        # create TabularCPD data structure to nest calculated CPD
        for j in modelDict:
            if j == selfName:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'],
                                                       evidence=modelList,
                                                       evidence_card=evidenceCard)
            else:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j,
                                                       variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'])

        # Associating the CPDs with the network
        for j in modelDict:
            model.add_cpds(modelDict[j]['cpdObject'])

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
        # defined and sum to 1.
        if not model.check_model():
            print 'Model check returned unsuccessful'
            return False

        infer = VariableElimination(model)
        confMatrix = np.zeros(len(modelDict[selfName]['labels']))
        # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased
        for j in range(len(modelDict[selfName]['actualLabels'])):
            currEvidenceDict = dict()
            for k in modelList:
                currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j])

            q = infer.query([selfName], currEvidenceDict)

            inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)]
            actualClass = modelDict[selfName]['actualLabels'][j]
            confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1

        print "%Accuracy with PGN"
        dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix)

        return True