def train(self, iterations=10, maxmiss=10): """ Train using Recommended Greedy Algorithm """ scores = { 'Iteration': [], #'Network':[], 'Score': [] } score_check = maxmiss niter = iterations nodes = [i for i in self.data.columns] best = score_pom(export_pom(self.net, by='label'), self.data) #print("START LOOP") while score_check > 0 and niter > 0: n = dc(self.net) upd = set() ops = [n.add_edge, n.del_edge, n.rev_edge] for f in ops: edge = np.random.choice(nodes, size=2, replace=False) f(edge[0], edge[1]) upd.add(edge[0]) upd.add(edge[1]) if n.acyclic(): n.calc_cpt(self.data, alpha=self.alpha, change=upd) score = score_pom(export_pom(n, by='label'), self.data) scores['Iteration'].append(iterations - niter) #scores['Network'].append(n) scores['Score'].append(score) #print(best, score, niter, score_check) if score > best: self.net = n best = score niter = niter - 1 score_check = maxmiss continue else: score_check = score_check - 1 niter = niter - 1 continue else: niter = niter - 1 continue self.scores = scores
def specificity(net, data): res = {} m = export_pom(net, by=net.by) for c in data.columns: #print(c) res[c] = sp(pred_bin(m, data, c)) return res
def sensetivity(net, data): res = {} m = export_pom(net, by=net.by) for c in data.columns: #print(pred_bin(m, data, c)) res[c] = sn(pred_bin(m, data, c)) return res
def accuracy(net, data): res = {} m = export_pom(net, by=net.by) for c in data.columns: #print(c) res[c] = ac(pred_mult(m, data, c)) return res
def train(self, iterations=10, maxmiss=10): """ Train using Recommended Greedy Algorithm """ scores = {'Iteration': [], 'Network': [], 'Score': []} score_check = maxmiss niter = iterations nodes = [i for i in self.data.columns] best = score_pom(export_pom(self.net, by='label'), self.data) #print("START LOOP") while score_check > 0 and niter > 0: n = net(data=self.data) n.import_dag(self.net.export_dag()) ops = [n.add_edge, n.del_edge, n.rev_edge] for f in ops: # Choose the first node in a uniform, random way v1 = np.random.choice(nodes) # Choose the second with probabilities weighted by mi v2 = np.random.choice(self.mi_weights[v1].index, p=self.mi_weights[v1]) f(v1, v2) if n.acyclic(): n.calc_cpt(self.data, alpha=self.alpha) score = score_pom(export_pom(n, by='label'), self.data) scores['Iteration'].append(iterations - niter) scores['Network'].append(n) scores['Score'].append(score) #print(best, score, niter, score_check) if score > best: self.net = n best = score niter = niter - 1 score_check = maxmiss continue else: score_check = score_check - 1 niter = niter - 1 continue else: niter = niter - 1 continue self.scores = scores
t_res = t_res.append( pd.DataFrame(grd.scores).assign(Trial=i, Learner='GREEDY', Net="ds1")) t_res = t_res.append( pd.DataFrame(cgm.scores).assign(Trial=i, Learner='CASGMM', Net="ds1")) t_res = t_res.append( pd.DataFrame(cmd.scores).assign(Trial=i, Learner='CASMOD', Net="ds1")) t_res = t_res.append( pd.DataFrame(cjn.scores).assign(Trial=i, Learner='CASJNK', Net="ds1")) g_res = g_res.append(pd.DataFrame({ i[0]: [i[1]] for i in edge_hits(export_pom(grd.net, by='label'), export_pom(tn1, by='label')).items() }).assign(Trial=i, Learner='GREEDY', Net="ds1"), sort=False) g_res = g_res.append(pd.DataFrame({ i[0]: [i[1]] for i in edge_hits(export_pom(cgm.net, by='label'), export_pom(tn1, by='label')).items() }).assign(Trial=i, Learner='CASGMM', Net="ds1"), sort=False) g_res = g_res.append(pd.DataFrame({ i[0]: [i[1]] for i in edge_hits(export_pom(cmd.net, by='label'), export_pom(tn1, by='label')).items()
t_res=t_res.append( pd.DataFrame( cmd.scores).assign(Trial = i, Learner = 'CASMOD', Net="ds2") ) t_res=t_res.append( pd.DataFrame( cjn.scores).assign(Trial = i, Learner = 'CASJNK', Net="ds2") ) g_res = g_res.append( pd.DataFrame( { i[0]:[i[1]] for i in edge_hits( export_pom(grd.net, by='label'), export_pom(tn2, by='label') ).items() } ).assign(Trial = i, Learner = 'GREEDY', Net="ds2"), sort=False ) g_res = g_res.append( pd.DataFrame( { i[0]:[i[1]] for i in edge_hits( export_pom(cgm.net, by='label'), export_pom(tn2, by='label')
tn2 = net(data=ds2) for i in range(0, 4): tn2.add_edge('G' + str(i), 'G' + str(i + 1)) for i in range(5, 9): tn2.add_edge('G' + str(i), 'G' + str(i + 1)) for i in range(10, 14): tn2.add_edge('G' + str(i), 'G' + str(i + 1)) for i in range(15, 19): tn2.add_edge('G' + str(i), 'G' + str(i + 1)) tn2.calc_cpt(ds2, alpha=0.00001) export_pom(tn1, by='label') # Data Set 3: 4 Variables generated by sampling from a fixed set of conditional # probability tables (Example from: Finsen Jenn) data = pd.DataFrame({ 'Icy': [0, 1], 'Holmes': [0, 1], 'Watson': [0, 1], 'Ambulance': [0, 1] }) tn3 = net(data=data) tn3.add_edge('Icy', 'Watson') tn3.add_edge('Icy', 'Holmes') tn3.add_edge('Holmes', 'Ambulance')
] data['G4'] = [ pfun(data['G3'][i]) for i in range(0,len(data['G3'])) ] data = pd.DataFrame(data) data = data[['G1', 'G2', 'G3', 'G4']] a = net(data=data) a.add_edge(1,0) a.add_edge(2,3) a.calc_cpt(data) m = export_pom(a, by='label') v = [i.name for i in m.states] # Function that calculates the probability of a row given a set of nodes def f(r, nds): pr = [] for n in nds: c=n.cpt.columns.drop('Prob') pr.append( #np.log( n.cpt.set_index(list(c)).loc[tuple(r[c])].values[0] # ) ) return(np.array(pr).prod())