def create_networks(self): network = BayesianModel([('goal_left', 'motor_left'), ('goal_right', 'motor_right'), ('obstacle_left', 'motor_left'), ('obstacle_right', 'motor_right'), ('obstacle_left', 'vision_1'), ('obstacle_left', 'vision_2'), ('obstacle_left', 'vision_3'), ('obstacle_right', 'vision_4'), ('obstacle_right', 'vision_5'), ('obstacle_right', 'vision_6')]) cpd_1 = double_hypo_cpd('motor_left', 'goal_left', 'obstacle_left') cpd_2 = double_hypo_cpd('motor_right', 'goal_right', 'obstacle_right') cpd_3 = wandering_hypo_cpd('goal_left') cpd_4 = wandering_hypo_cpd('goal_right') cpd_5 = obstacle_hypo_cpd('obstacle_left') cpd_6 = obstacle_hypo_cpd('obstacle_right') cpd_7 = single_hypo_cpd('vision_1', 'obstacle_left') cpd_8 = single_hypo_cpd('vision_2', 'obstacle_left') cpd_9 = single_hypo_cpd('vision_3', 'obstacle_left') cpd_10 = single_hypo_cpd('vision_4', 'obstacle_right') cpd_11 = single_hypo_cpd('vision_5', 'obstacle_right') cpd_12 = single_hypo_cpd('vision_6', 'obstacle_right') network.add_cpds(cpd_1, cpd_2, cpd_3, cpd_4, cpd_5, cpd_6, cpd_7, cpd_8, cpd_9, cpd_10, cpd_11, cpd_12) network.check_model() return { 'main': GenerativeModel(SensoryInputVirtualPeepo(self), network) }
def generate_cpds(self): model = BayesianModel([(str(a), str(b)) for a, b in self.graph.edges()]) variable_cards = {} cpds = [] for n in nx.topological_sort(self.graph): causes = sorted(self.graph.predecessors(n)) variable_card = random.choice([2, 3, 4, 5]) variable_cards[n] = variable_card if len(causes) == 0: values = np.random.rand(1, variable_card) values = values / np.sum(values) cpd = TabularCPD(variable=str(n), variable_card=variable_card, values=values) cpds.append(cpd) else: evidence_card = [variable_cards[i] for i in causes] values = np.random.rand(variable_card, np.prod(evidence_card)) values = values / np.sum(values, axis=0) cpd = TabularCPD(variable=str(n), variable_card=variable_card, values=values, evidence=[str(a) for a in causes], evidence_card=evidence_card) cpds.append(cpd) model.add_cpds(*cpds) model.check_model() self.model = model
def createBayesianNetwork(): # defining the network structure model = BayesianModel([('asia', 'tub'), ('smoke', 'bronc'), ('smoke', 'lung'), ('lung', 'either'), ('tub', 'either'), ('bronc', 'dysp'), ('either', 'xray'), ('either', 'dysp')]) # defining the parameters cpd_asia = TabularCPD(variable='asia', variable_card=2, values=[[0.01], [0.99]]) cpd_smoke = TabularCPD(variable='smoke', variable_card=2, values=[[0.5], [0.5]]) cpd_tub = TabularCPD(variable='tub', variable_card=2, values=[[0.05, 0.99], [0.95, 0.01]], evidence=['asia'], evidence_card=[2]) cpd_lung = TabularCPD(variable='lung', variable_card=2, values=[[0.1, 0.99], [0.9, 0.01]], evidence=['smoke'], evidence_card=[2]) cpd_bronc = TabularCPD(variable='bronc', variable_card=2, values=[[0.6, 0.7], [0.4, 0.3]], evidence=['smoke'], evidence_card=[2]) cpd_either = TabularCPD(variable='either', variable_card=2, values=[[1.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]], evidence=['lung', 'tub'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='xray', variable_card=2, values=[[0.98, 0.95], [0.02, 0.05]], evidence=['either'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='dysp', variable_card=2, values=[[0.9, 0.7, 0.8, 0.1], [0.1, 0.3, 0.2, 0.9]], evidence=['bronc', 'either'], evidence_card=[2, 2]) # Associating the CPDs with the network model.add_cpds(cpd_asia, cpd_smoke, cpd_tub, cpd_lung, cpd_bronc, cpd_either, cpd_xray, cpd_dysp) model.check_model() return model
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR): print('Making bayes net') graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p' if os.path.isfile(graph_file) and load == True: print('Loading saved graph from file...') G = pickle.load(open(graph_file, 'rb')) G.check_model() else: print('loading data...') training_labels, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() G.add_edges_from([(label, label + '_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [ c for c in networkx.ancestors(obo_graph, label) if c in labels_list ] for child in children: G.add_edge(child, label) predicted_cpds = get_model_cpds(labels_list=labels_list, modelsdir=MODEL_CPDS_DIR) for cpd in predicted_cpds: G.add_cpds(cpd) true_label_cpds = get_true_label_cpds(training_labels, go_dict, labels_list=labels_list) for cpd in true_label_cpds: G.add_cpds(cpd) remove_list = [] for node in G.nodes(): if G.get_cpds(node) == None: remove_list.append(node) # remove_list.append(node+'_hat') for node in remove_list: if node in G: G.remove_node(node) G.check_model() pickle.dump(G, open(graph_file, 'wb')) return G
def make_DAG(DAG, CPD=None, checkmodel=True, verbose=3): """Create Directed Acyclic Graph based on list. Parameters ---------- DAG : list list containing source and target in the form of [('A','B'), ('B','C')]. CPD : list, array-like Containing TabularCPD for each node. checkmodel : bool Check the validity of the model. The default is True verbose : int, optional Print progress to screen. The default is 3. 0: None, 1: ERROR, 2: WARN, 3: INFO (default), 4: DEBUG, 5: TRACE Raises ------ Exception Should be list. Returns ------- pgmpy.models.BayesianModel.BayesianModel model of the DAG. """ if (CPD is not None) and (not isinstance(CPD, list)): CPD = [CPD] if isinstance(DAG, dict): DAG = DAG.get('model', None) if (not isinstance(DAG, list)) and ('pgmpy' not in str(type(DAG))): raise Exception( "[bnlearn] >Error: Input DAG should be a list. in the form [('A','B'), ('B','C')] or a <pgmpy.models.BayesianModel.BayesianModel>" ) elif ('pgmpy' in str(type(DAG))): if verbose >= 3: print('[bnlearn] >No changes made to existing Bayesian DAG.') elif isinstance(DAG, list): if verbose >= 3: print('[bnlearn] >Bayesian DAG created.') DAG = BayesianModel(DAG) if CPD is not None: for cpd in CPD: DAG.add_cpds(cpd) if verbose >= 3: print('[bnlearn] >Add CPD: %s' % (cpd.variable)) if checkmodel: print('[bnlearn] >Model correct: %s' % (DAG.check_model())) # Create adjacency matrix from DAG out = {} out['adjmat'] = _dag2adjmat(DAG) out['model'] = DAG return out
def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=3): testing_graph = BayesianModel() testing_graph.add_nodes_from(bn_truth.causal_graph.nodes()) for edge in remove_bidirected_edges(graph.edges()): try: testing_graph.add_edge(edge[0], edge[1]) except Exception as e: try: testing_graph.add_edge(edge[1], edge[0]) except Exception as e: print(e) continue testing_graph.fit(df_samples, estimator=BayesianEstimator) testing_graph.check_model() bn_test = BayesianNetwork(testing_graph) set_observe(bn_test.bn) set_observe(bn_truth.bn) bn_truth.set_state_names() bn_test.set_state_names() return { 'SID': SID(bn_truth.causal_graph, bn_test.causal_graph), 'SHD': SHD(bn_truth.causal_graph, bn_test.causal_graph), 'OD': np.mean([ ODist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]), 'ID': np.mean([ IDist(bn_truth, bn_test, 1000, discrete=True) for _ in range(nb_repeat) ]) }
def probnet(): # Defining the model structure. We can define the network by just passing a list of edges. model = BayesianModel([('H', 'S'), ('B', 'S'), ('D', 'S')]) # Defining individual CPDs. cpd_h = TabularCPD(variable='H', variable_card=2, values=[[0.2, 0.8]]) cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.1, 0.9]]) cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.5, 0.5]]) cpd_s = TabularCPD(variable='S', variable_card=2, values=[[0.1, 0.2, 0.1, 0.15, 0.4, 0.35, 0.45, 0.43], [0.9, 0.8, 0.9, 0.85, 0.6, 0.65, 0.55, 0.57]], evidence=['H', 'B', 'D'], evidence_card=[2, 2, 2]) # Associating the CPDs with the network model.add_cpds(cpd_h, cpd_b, cpd_d, cpd_s) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model() print(model.get_cpds('S')) # infer = VariableElimination(model) # infer.map_query('S', evidence={'H': 1, 'B': 0, 'D': 1}) return model
def query(self, networkFile, queryFile): file1 = open(networkFile) lines = file1.readlines() model = BayesianModel() edges = self.getegdes(lines[0]) for i in range(int(len(edges) / 2)): model.add_edge(edges[2 * i], edges[2 * i + 1]) for line in lines[1:]: variable, variable_card, evidence, evidence_card, values = self.getcpbvar(line) cpb = TabularCPD(variable=variable, variable_card=variable_card, evidence=evidence, evidence_card=evidence_card, values=[values]) model.add_cpds(cpb) model.check_model() infer = VariableElimination(model) # infer.query(['G'], evidence={'S': 0, 'D':1}) file2 = open(queryFile) lines = file2.readlines() for line in lines: node, evidence2 = self.infer_query(line) print(infer.query([node], evidence=evidence2)[node].values)
def bayesian_network_prediction2(rank, ad_cpt, gh_cpt, ga_cpt, prediction_cpt): ###创建模型代码 # coding: utf-8 # In[16]: # Starting with defining the network structure dolores_model = BayesianModel([('ability_difference', 'goals_home'), ('ability_difference', 'goals_away'), ('goals_home', 'Prediction'), ('goals_away', 'Prediction')]) cpd_AD = TabularCPD(variable='ability_difference', variable_card=42, values=ad_cpt) cpd_GH = TabularCPD(variable='goals_home', variable_card=8, values=gh_cpt, evidence=['ability_difference'], evidence_card=[42]) cpd_GA = TabularCPD(variable='goals_away', variable_card=8, values=ga_cpt, evidence=['ability_difference'], evidence_card=[42]) cpd_P = TabularCPD(variable='Prediction', variable_card=3, values=prediction_cpt, evidence=['goals_home', 'goals_away'], evidence_card=[8, 8]) # Associating the parameters with the model structure. dolores_model.add_cpds(cpd_AD, cpd_GH, cpd_GA, cpd_P) # Checking if the cpds are valid for the model. dolores_model.check_model() dolores_model.get_independencies() from pgmpy.inference import VariableElimination inference = VariableElimination(dolores_model) pred = inference.query(variables=['Prediction'], evidence={'ability_difference': rank}) pred_gh = inference.query(variables=['goals_home'], evidence={'ability_difference': rank}) pred_ga = inference.query(variables=['goals_away'], evidence={'ability_difference': rank}) return pred.values, pred_gh.values, pred_ga.values
def createBayesModel(self, fileName): file = open(fileName) print(sys.argv[1]) lines = file.readlines() model = BayesianModel() edges = self.getegdes(lines[0]) for i in range(int(len(edges) / 2)): model.add_edge(edges[2 * i], edges[2 * i + 1]) for line in lines[1:]: variable, variable_card, evidence, evidence_card, values = self.getcpbvar( line) cpb = TabularCPD(variable=variable, variable_card=variable_card, evidence=evidence, evidence_card=evidence_card, values=[values]) model.add_cpds(cpb) re = model.check_model() print(re)
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def test_check_model(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 's'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['l'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['d'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]), ['g', 'd', 'i'], [2, 2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD('s', 2, np.array([[0.5, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.1, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
def configure(self, rf): # command format will be the following: # trainPGClassifier selfName networkStructure print sys.argv # read network structure and make graph # labels in networkStructure identical to model names # networkStructure as a string containing a list of tuples # selfName = 'actionPGN' # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]" selfName = sys.argv[1] netStructureString = sys.argv[2] netStructure = ast.literal_eval(netStructureString) print netStructure # collect all model names in a list to extract a unique set modelList = [] for k in netStructure: modelList += list(k) print list(set(modelList)) # create a port to connect to /sam/rpc:i to query model path for each model name portsList = [] querySupervisorPort = yarp.RpcClient() querySupervisorPortName = '/sam/' + selfName + '/queryRpc' querySupervisorPort.open(querySupervisorPortName) portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort}) yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i') # --------------------------------------------------------------------------------------------------------------- modelDict = dict() failFlag = False for j in modelList: if j != selfName: modNameSplit = j.split(' ') cmd = yarp.Bottle() cmd.addString('dataDir') for l in modNameSplit: cmd.addString(l) reply = yarp.Bottle() querySupervisorPort.write(cmd, reply) if reply.get(0).asString() != 'nack': modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None} # try: # load pickle for the model file currPickle = pickle.load(open(reply.get(1).asString(), 'rb')) # try loading labelComparisonDict from the pickle if 'labelComparisonDict' in currPickle.keys(): modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict'] print j, 'labelComparisonDict loaded' else: print modNameSplit[0], 'labelComparisonDict not found' failFlag = True if 'overallPerformanceLabels' in currPickle.keys(): modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels'] print j, 'overallPerformanceLabels loaded' else: print j, 'overallPerformanceLabels not found' failFlag = True # except: # failFlag = True else: failFlag = True print 'FAIL?', failFlag if failFlag: return False modelList = modelDict.keys() print modelList # --------------------------------------------------------------------------------------------------------------- # extract unique lists from the collected data # the unique list of pickleData[original] represents the possibleClassifications for each model modelDict[selfName] = dict() modelDict[selfName]['labels'] = [] selfModelCol = 1 for j in modelList: modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])]) print j, 'unique labels:', modelDict[j]['labels'] print j, 'CPD shape', modelDict[j]['CPD'].shape modelDict[selfName]['labels'] += modelDict[j]['labels'] selfModelCol *= len(modelDict[j]['labels']) print # the possibleClassifications for both models (outputs of the PGN) # are the unique list of the model specific labels for all models modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels'])) modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original'] modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol]) print selfName, 'unique labels:', modelDict[selfName]['labels'] print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape # check that original classifications of both are identical # otherwise cannot combine them with a single node. # This is currently a big limitation that will be removed later print modelDict[selfName]['labels'] for j in modelList: print j, for k in range(len(modelDict[j]['pickleData']['original'])): print modelDict[j]['pickleData']['original'][k] if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']: modelDict[j]['pickleData']['original'][k] = 'unknown' for j in modelList: if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']: failFlag = True print 'original classifications of', j, 'are not identical to those of', selfName if failFlag: return False # Update netStructureString to reflect changes in the modelList names strSections = netStructureString.split("'") for k in range(len(strSections)): if len(strSections[k]) > 2 and ',' not in strSections[k]: strSections[k] = strSections[k].split(' ')[0] netStructureString = "'".join(strSections) netStructure = ast.literal_eval(netStructureString) # --------------------------------------------------------------------------------------------------------------- # iterate through actual labels # for each actual label, iterate through models # for each model find classification label of this model for current actual label # get the index of the current classification and add it to its CPD # also calculate which item in the joint CPD needs to be incremented for j in range(len(modelDict[selfName]['actualLabels'])): currActualLabel = modelDict[selfName]['actualLabels'][j] row = modelDict[selfName]['labels'].index(currActualLabel) colVar = np.zeros([len(modelList)]) for k in range(len(modelList)): cmod = modelList[k] if k != 0: pmod = modelList[k-1] colVar *= len(modelDict[pmod]['labels']) colVar[k] = modelDict[cmod]['labels'].index( modelDict[cmod]['pickleData']['results'][j]) modelDict[cmod]['CPD'][0, colVar[k]] += 1 col = sum(colVar) modelDict[selfName]['CPD'][row, col] += 1 # take all CPD's and normalise the matrices evidenceCard = copy.deepcopy(modelList) for j in modelDict: if j == selfName: # this is a joint CPD matrix # normalise columns to have sum = 1 modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1') else: # normalise sum of matrix = 1 modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD']) evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels']) print modelDict[j]['CPD'] model = BayesianModel(netStructure) # create TabularCPD data structure to nest calculated CPD for j in modelDict: if j == selfName: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD'], evidence=modelList, evidence_card=evidenceCard) else: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD']) # Associating the CPDs with the network for j in modelDict: model.add_cpds(modelDict[j]['cpdObject']) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. if not model.check_model(): print 'Model check returned unsuccessful' return False infer = VariableElimination(model) confMatrix = np.zeros(len(modelDict[selfName]['labels'])) # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased for j in range(len(modelDict[selfName]['actualLabels'])): currEvidenceDict = dict() for k in modelList: currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j]) q = infer.query([selfName], currEvidenceDict) inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)] actualClass = modelDict[selfName]['actualLabels'][j] confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1 print "%Accuracy with PGN" dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix) return True