data1 = pd.concat([pd.DataFrame(train_data), pd.DataFrame(train_label)], axis=1, ignore_index=True) # Best of a bad bunch data1.to_csv('D:/temp/data_labels.csv', index=False) data = pd.read_csv('D:/temp/data_labels.csv') number, nver = np.shape(data) target = nver - 1 print(number) train_indices = [] test_indices = [] for index, method in enumerate(method_list): print(target) print(method) if method == "MMMB": MB, ci_num = MMMB(data, target, alaph, is_discrete) elif method == "IAMB": MB, ci_num = IAMB(data, target, alaph, is_discrete) elif method == "IAMBnPC": MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) elif method == "inter_IAMB": MB, ci_num = inter_IAMB(data, target, alaph, is_discrete) elif method == "interIAMBnPC": MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete) elif method == "fast_IAMB": MB, ci_num = fast_IAMB(data, target, alaph, is_discrete) elif method == "GSMB": MB, ci_num = GSMB(data, target, alaph, is_discrete) elif method == "HITON_MB": MB, ci_num = HITON_MB(data, target, alaph, is_discrete) elif method == "PCMB":
def example(method, data, list_target, alpha, is_discrete, k=0): file = open("../output/mb.txt", "w+") if method == "MMMB": start_time = time.process_time() for target in list_target: MB, ci_num = MMMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "inter_IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = inter_IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "fast_IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = fast_IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "GSMB": start_time = time.process_time() for target in list_target: MB, ci_num = GSMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "HITON_MB": start_time = time.process_time() for target in list_target: MB, ci_num = HITON_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "semi_HITON_MB": start_time = time.process_time() for target in list_target: MB, ci_num = semi_HITON_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "PCMB": start_time = time.process_time() for target in list_target: MB, ci_num = PCMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IPCMB": start_time = time.process_time() for target in list_target: MB, ci_num = IPC_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "STMB": start_time = time.process_time() for target in list_target: MB, ci_num = STMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() for target in list_target: MB, ci_num = IAMBnPC(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "interIAMBnPC": start_time = time.process_time() for target in list_target: MB, ci_num = interIAMBnPC(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "BAMB": start_time = time.process_time() for target in list_target: MB, ci_num = BAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "FBEDk": start_time = time.process_time() for target in list_target: MB, ci_num = FBED(data, target, k, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "MBOR": start_time = time.process_time() for target in list_target: MB, ci_num = MBOR(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "LRH": start_time = time.process_time() for target in list_target: MB, ci_num = LRH(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "KIAMB": start_time = time.process_time() for target in list_target: MB, ci_num = KIAMB(data, target, alpha, k, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "TIE": start_time = time.process_time() for target in list_target: MB = TIE(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "TIE_p": start_time = time.process_time() for target in list_target: MB = TIE_p(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() else: raise Exception("method input error!") print("the running time is: " + str(end_time - start_time)) file.write("the running time is: " + str(end_time - start_time) + "\n") file.close()
def MBbyMB(data, target, alpha, is_discrete=True): ci_test = 0 max_k = 3 _, kvar = np.shape(data) DAG = np.zeros((kvar, kvar)) pdag = DAG.copy() G = DAG.copy() mb_calcualted = [True for i in range(kvar)] all_pc = [[] for i in range(kvar)] all_mb = [[] for i in range(kvar)] all_can_spouse = [[] for i in range(kvar)] all_sepset = [[[]] * kvar for i in range(kvar)] Q = [target] tmp = [] num_calculated = 0 while len(tmp) <= kvar and len(Q) > 0: A = Q[0] del Q[0] if A in tmp: continue else: tmp.append(A) # get MB(A) if mb_calcualted[A]: all_mb[A], ntest = MMMB(data, A, alpha, is_discrete) ci_test += ntest mb_calcualted[A] = False all_pc[A] = all_mb[A].copy() for B in all_mb[A]: Q.append(B) DAG[A, B] = 1 DAG[B, A] = 1 if pdag[A, B] == 0 and pdag[B, A] == 0: pdag[A, B] = 1 pdag[B, A] = 1 G[A, B] = 1 G[B, A] = 1 cutSetSize = 0 break_flag = False can_pc = [i for i in all_mb[A] if i != B] while len(can_pc) >= cutSetSize and cutSetSize <= max_k: SS = subsets(can_pc, cutSetSize) for z in SS: ci_test += 1 pval, _ = cond_indep_test(data, B, A, z, is_discrete) if pval > alpha: all_sepset[A][B] = [i for i in z] all_sepset[B][A] = [i for i in z] DAG[A, B] = 0 DAG[B, A] = 0 pdag[A, B] = 0 pdag[B, A] = 0 G[A, B] = 0 G[B, A] = 0 all_pc[A] = [i for i in all_pc[A] if i != B] all_can_spouse[A].append(B) break_flag = True break if break_flag: break cutSetSize += 1 # print("all_sepset: ", all_sepset) # find v-structures for C in all_can_spouse[A]: for B in all_pc[A]: # A->B<-C if B not in all_sepset[A][C]: DAG[A, B] = 1 DAG[B, A] = 1 pdag[A, B] = -1 pdag[B, A] = 0 pdag[C, B] = -1 pdag[B, C] = 0 G[A, B] = 1 G[B, A] = 0 G[C, B] = 1 G[B, C] = 0 [DAG, pdag, G] = meek(DAG, pdag, G, kvar) num_calculated += 1 if num_calculated > len(all_mb[target]): if 1 not in pdag[target, :] and 1 not in pdag[:, target]: break parents = [i for i in range(kvar) if pdag[i, target] == -1] children = [i for i in range(kvar) if pdag[target, i] == -1] undirected = [i for i in range(kvar) if pdag[target, i] == 1] PC = list(set(parents).union(set(children)).union(set(undirected))) return parents, children, PC, undirected # import warnings # warnings.filterwarnings('ignore') # import pandas as pd # data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv") # print("the file read") # import numpy as np # num1, kvar = np.shape(data) # alaph = 0.01 # # for target in range(kvar): # P, C, PC, und = MBbyMB(data, target, alaph, True) # print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
def evaluation(method, path, all_number_Para, target_list, real_graph_path, is_discrete, filenumber=10, alaph=0.01, k=1): # pre_set variables is zero Precision = 0 Recall = 0 F1 = 0 Distance = 0 use_time = 0 ci_number = 0 realmb, realpc = realMB(all_number_Para, real_graph_path) length_targets = len(target_list) for m in range(filenumber): completePath = path + str(m + 1) + ".csv" data = pd.read_csv(completePath) number, kVar = np.shape(data) ResMB = [[]] * length_targets # print("\ndata set is: " + str(m+1) + ".csv") for i, target in enumerate(target_list): # print("target is: " + str(target)) if method == "MMMB": start_time = time.process_time() MB, ci_num = MMMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IAMB": start_time = time.process_time() MB, ci_num = IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "KIAMB": start_time = time.process_time() MB, ci_num = KIAMB(data, target, alaph, k, is_discrete) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "inter_IAMB": start_time = time.process_time() MB, ci_num = inter_IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "interIAMBnPC": start_time = time.process_time() MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "fast_IAMB": start_time = time.process_time() MB, ci_num = fast_IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "GSMB": start_time = time.process_time() MB, ci_num = GSMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "HITON_MB": start_time = time.process_time() MB, ci_num = HITON_MB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "PCMB": start_time = time.process_time() MB, ci_num = PCMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IPCMB": start_time = time.process_time() MB, ci_num = IPC_MB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "STMB": start_time = time.process_time() MB, ci_num = STMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "BAMB": start_time = time.process_time() MB, ci_num = BAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "FBEDk": start_time = time.process_time() MB, ci_num = FBED(data, target, k, alaph, is_discrete) end_time = time.process_time() elif method == "MBOR": start_time = time.process_time() MB, ci_num = MBOR(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "LRH": start_time = time.process_time() MB, ci_num = LRH(data, target, alaph, is_discrete) end_time = time.process_time() else: raise Exception("method input error!") use_time += (end_time - start_time) ResMB[i] = MB ci_number += ci_num for n, target in enumerate(target_list): # print("target is: " + str(target) + " , n is: " + str(n)) true_positive = list( set(realmb[target]).intersection(set(ResMB[n]))) length_true_positive = len(true_positive) length_RealMB = len(realmb[target]) length_ResMB = len(ResMB[n]) if length_RealMB == 0: if length_ResMB == 0: precision = 1 recall = 1 F1 += 1 else: F1 += 0 precision = 0 recall = 0 else: if length_ResMB != 0: precision = length_true_positive / length_ResMB recall = length_true_positive / length_RealMB if precision + recall != 0: F1 += 2 * precision * recall / (precision + recall) else: F1 += 0 precision = 0 recall = 0 distance = ((1 - precision)**2 + (1 - recall)**2)**0.5 Distance += distance Precision += precision Recall += recall # print("current average Precision is: " + str(Precision / ((m+1) * (numberPara)))) # print("current average Recall is: " + str(Recall / ((m+1) * (numberPara)))) commonDivisor = length_targets * filenumber # 标准差 return F1 / commonDivisor, Precision / commonDivisor, Recall / commonDivisor, Distance / \ commonDivisor, ci_number / commonDivisor, use_time / commonDivisor
def MBGSL(data, alpha, is_discrete, selected): _, kvar = np.shape(data) max_k = 3 all_MB = [[] for i in range(kvar)] all_neighbor = [[] for i in range(kvar)] PP = np.zeros((kvar, kvar)) num_CI = 0 for i in range(kvar): if selected == 1: MB, n_c = MMMB(data, i, alpha, is_discrete) elif selected == 2: MB, n_c = HITON_MB(data, i, alpha, is_discrete) elif selected == 3: MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete) else: MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete) num_CI += n_c for j in MB: PP[i, j] = 1 # # AND Rule # for i in range(kvar): # for j in range(0, i): # if DAG[i, j] != DAG[j, i]: # DAG[i, j] = 0 # DAG[j, i] = 0 for i in range(kvar): for j in range(0, i): if PP[i, j] != PP[j, i]: PP[i, j] = 1 PP[j, i] = 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_MB[i].append(j) # removes the possible spouse links between linked variables X and Y for x in range(kvar): for y in all_MB[x]: vs = set(all_MB[x]).union(set(all_MB[y])) varis = list((set(all_MB[x]).difference([y])).union( set(all_MB[y]).difference([x]))) k = 0 break_flag = False while len(varis) > k and k <= max_k: ss = subsets(varis, k) for s in ss: num_CI += 1 pval, _ = cond_indep_test(data, x, y, s, is_discrete) if pval > alpha: PP[x, y] = 0 PP[x, y] = 0 break_flag = True break if break_flag: break k += 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_neighbor[i].append(j) DAG = PP.copy() pdag = DAG.copy() G = DAG.copy() # orient edges for x in range(kvar): for y in all_neighbor[x]: sz = list((set(all_neighbor[x]).difference( all_neighbor[y])).difference([y])) for z in sz: PP[y, x] = -1 B = list((set(all_MB[y]).difference([z])).union( set(all_MB[z]).difference([y]))) break_flag = False cutSetSize = 0 while len(B) >= cutSetSize and cutSetSize == 0: SS = subsets(B, cutSetSize) for s in SS: cond_s = list(set(s).union([x])) num_CI += 1 pval, _ = cond_indep_test(data, y, z, cond_s, is_discrete) if pval > alpha: PP[y, x] = 1 break_flag = True break if break_flag: break cutSetSize += 1 if PP[y, x] == -1: pdag[y, x] = -1 pdag[x, y] = 0 G[y, x] = 1 G[x, y] = 0 break DAG, pdag, G = meek(DAG, pdag, G, kvar) return pdag, num_CI