data1.to_csv('D:/temp/data_labels.csv', index=False) data = pd.read_csv('D:/temp/data_labels.csv') number, nver = np.shape(data) target = nver - 1 print(number) train_indices = [] test_indices = [] for index, method in enumerate(method_list): print(target) print(method) if method == "MMMB": MB, ci_num = MMMB(data, target, alaph, is_discrete) elif method == "IAMB": MB, ci_num = IAMB(data, target, alaph, is_discrete) elif method == "IAMBnPC": MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) elif method == "inter_IAMB": MB, ci_num = inter_IAMB(data, target, alaph, is_discrete) elif method == "interIAMBnPC": MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete) elif method == "fast_IAMB": MB, ci_num = fast_IAMB(data, target, alaph, is_discrete) elif method == "GSMB": MB, ci_num = GSMB(data, target, alaph, is_discrete) elif method == "HITON_MB": MB, ci_num = HITON_MB(data, target, alaph, is_discrete) elif method == "PCMB": MB, ci_num = PCMB(data, target, alaph, is_discrete) elif method == "IPCMB":
def evaluation(method, path, all_number_Para, target_list, real_graph_path, is_discrete, filenumber=10, alaph=0.01, k=1): # pre_set variables is zero Precision = 0 Recall = 0 F1 = 0 Distance = 0 use_time = 0 ci_number = 0 realmb, realpc = realMB(all_number_Para, real_graph_path) length_targets = len(target_list) for m in range(filenumber): completePath = path + str(m + 1) + ".csv" data = pd.read_csv(completePath) number, kVar = np.shape(data) ResMB = [[]] * length_targets # print("\ndata set is: " + str(m+1) + ".csv") for i, target in enumerate(target_list): # print("target is: " + str(target)) if method == "MMMB": start_time = time.process_time() MB, ci_num = MMMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IAMB": start_time = time.process_time() MB, ci_num = IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "KIAMB": start_time = time.process_time() MB, ci_num = KIAMB(data, target, alaph, k, is_discrete) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "inter_IAMB": start_time = time.process_time() MB, ci_num = inter_IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "interIAMBnPC": start_time = time.process_time() MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "fast_IAMB": start_time = time.process_time() MB, ci_num = fast_IAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "GSMB": start_time = time.process_time() MB, ci_num = GSMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "HITON_MB": start_time = time.process_time() MB, ci_num = HITON_MB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "PCMB": start_time = time.process_time() MB, ci_num = PCMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IPCMB": start_time = time.process_time() MB, ci_num = IPC_MB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "STMB": start_time = time.process_time() MB, ci_num = STMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() MB, ci_num = IAMBnPC(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "BAMB": start_time = time.process_time() MB, ci_num = BAMB(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "FBEDk": start_time = time.process_time() MB, ci_num = FBED(data, target, k, alaph, is_discrete) end_time = time.process_time() elif method == "MBOR": start_time = time.process_time() MB, ci_num = MBOR(data, target, alaph, is_discrete) end_time = time.process_time() elif method == "LRH": start_time = time.process_time() MB, ci_num = LRH(data, target, alaph, is_discrete) end_time = time.process_time() else: raise Exception("method input error!") use_time += (end_time - start_time) ResMB[i] = MB ci_number += ci_num for n, target in enumerate(target_list): # print("target is: " + str(target) + " , n is: " + str(n)) true_positive = list( set(realmb[target]).intersection(set(ResMB[n]))) length_true_positive = len(true_positive) length_RealMB = len(realmb[target]) length_ResMB = len(ResMB[n]) if length_RealMB == 0: if length_ResMB == 0: precision = 1 recall = 1 F1 += 1 else: F1 += 0 precision = 0 recall = 0 else: if length_ResMB != 0: precision = length_true_positive / length_ResMB recall = length_true_positive / length_RealMB if precision + recall != 0: F1 += 2 * precision * recall / (precision + recall) else: F1 += 0 precision = 0 recall = 0 distance = ((1 - precision)**2 + (1 - recall)**2)**0.5 Distance += distance Precision += precision Recall += recall # print("current average Precision is: " + str(Precision / ((m+1) * (numberPara)))) # print("current average Recall is: " + str(Recall / ((m+1) * (numberPara)))) commonDivisor = length_targets * filenumber # 标准差 return F1 / commonDivisor, Precision / commonDivisor, Recall / commonDivisor, Distance / \ commonDivisor, ci_number / commonDivisor, use_time / commonDivisor
def example(method, data, list_target, alpha, is_discrete, k=0): file = open("../output/mb.txt", "w+") if method == "MMMB": start_time = time.process_time() for target in list_target: MB, ci_num = MMMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "inter_IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = inter_IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "fast_IAMB": start_time = time.process_time() for target in list_target: MB, ci_num = fast_IAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "GSMB": start_time = time.process_time() for target in list_target: MB, ci_num = GSMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "HITON_MB": start_time = time.process_time() for target in list_target: MB, ci_num = HITON_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "semi_HITON_MB": start_time = time.process_time() for target in list_target: MB, ci_num = semi_HITON_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "PCMB": start_time = time.process_time() for target in list_target: MB, ci_num = PCMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IPCMB": start_time = time.process_time() for target in list_target: MB, ci_num = IPC_MB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "STMB": start_time = time.process_time() for target in list_target: MB, ci_num = STMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "IAMBnPC": start_time = time.process_time() for target in list_target: MB, ci_num = IAMBnPC(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "interIAMBnPC": start_time = time.process_time() for target in list_target: MB, ci_num = interIAMBnPC(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "BAMB": start_time = time.process_time() for target in list_target: MB, ci_num = BAMB(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "FBEDk": start_time = time.process_time() for target in list_target: MB, ci_num = FBED(data, target, k, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "MBOR": start_time = time.process_time() for target in list_target: MB, ci_num = MBOR(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "LRH": start_time = time.process_time() for target in list_target: MB, ci_num = LRH(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "KIAMB": start_time = time.process_time() for target in list_target: MB, ci_num = KIAMB(data, target, alpha, k, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "TIE": start_time = time.process_time() for target in list_target: MB = TIE(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() elif method == "TIE_p": start_time = time.process_time() for target in list_target: MB = TIE_p(data, target, alpha, is_discrete) file.write("the MB of " + str(target) + " is:" + str(MB) + "\n") print("the MB of " + str(target) + " is:" + str(MB)) end_time = time.process_time() else: raise Exception("method input error!") print("the running time is: " + str(end_time - start_time)) file.write("the running time is: " + str(end_time - start_time) + "\n") file.close()
def MB_by_MB(data, target, alaph, is_discrete=True): n, p = np.shape(data) Donelist = [] # whose MBs have been found Waitlist = [target] # whose MBs will be foundM G = np.zeros((p, p)) # 1 denotes ->, 0 denote no edges pdag = G.copy() # -1 denotes ->, 0 denote no edges DAG = G.copy() # 1 denote -,0 denote no edges MB = [[] for i in range(p)] sepset = [[[]] * p for i in range(p)] k = 3 while Waitlist != []: stop = False Waitlist_temp = Waitlist.copy() for x in Waitlist_temp: spouse = [[] for i in range(p)] Donelist.append(x) Waitlist.remove(x) MB[x], _ = IAMB(data, x, alaph, is_discrete) for i in MB[x]: Waitlist.append(i) findflag = False for i in range(len(MB)): if set(MB[x]) < set(MB[i]): findflag = True break if set(MB[x]) <= set(Donelist): findflag = True if findflag: continue # find spouse and pc # print("find spouse and pc") pc = MB[x].copy() # print("MB is " + str(MB)) for i in range(len(MB[x])): cutsetsize = 0 break_flag = 0 c = MB[x][i] # print("c is " + str(c)) CanPC = [i for i in MB[x] if i != c] # print("CanPC is " + str(CanPC)) while len(CanPC) >= cutsetsize and cutsetsize <= k: SS = subsets(CanPC, cutsetsize) # print("SS is " + str(SS)) for s in SS: # print("s is " + str(s)) pval, _ = cond_indep_test(data, x, c, s, is_discrete) # print("pval is " + str(pval)) if pval <= alaph: continue else: sepset[x][c] = s # print("sepset[x][c] is " + str(sepset[x][c])) pc.remove(c) break_flag = True break if break_flag: break cutsetsize += 1 # print("pc is " + str(pc)) rest = [i for i in MB[x] if i not in pc] # print("rest is " + str(rest)) for i in range(len(rest)): for j in range(len(pc)): if pc[j] in sepset[x][rest[i]]: continue condition = [str(m) for m in sepset[x][rest[i]]] # print("before condition is " + str(condition)) condition = list(set(condition).union(set(str(rest[i])))) # print("condition is " + str(condition)) pval, _ = cond_indep_test(data, rest[i], x, condition, is_discrete) # print("pval is "+ str(pval)) if pval <= alaph or math.isnan(pval): spouse[j].append(rest[i]) # print("v-structure") # print("spouse is " + str(spouse)) # construct v-strcture for i in range(len(pc)): b = pc[i] DAG[x, b] = 1 DAG[b, x] = 1 if pdag[x, b] == 0 and pdag[b, x] == 0: pdag[x, b] = 1 pdag[b, x] = 1 G[x, b] = 1 G[b, x] = 1 if len(spouse[i]) > 0: for j in range(len(spouse[i])): c = spouse[i][j] DAG[c, b] = 1 DAG[b, c] = 1 DAG[x, c] = 0 DAG[c, x] = 0 pdag[x, b] = -1 pdag[c, b] = -1 pdag[b, x] = 0 pdag[b, c] = 0 pdag[x, c] = 0 pdag[c, x] = 0 G[x, b] = 1 G[c, b] = 1 G[b, x] = 0 G[b, c] = 0 G[c, x] = 0 G[x, c] = 0 # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0 # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0 # oriented by meek approach # print("meek") pDAG = Meek(DAG, pDAG, data) # if all edges connected to T are oriented stop = True connect = [i for i in range(p) if DAG[target, i] == 1] # all nodes connected to target # print("connect is " + str(connect)) for i in connect: if pdag[target, i] != -1 and pdag[i, target] != -1: stop = False break if stop: break if stop: break # print("Donelist is " + str(Donelist)) # print("Waitlist is " + str(Waitlist)) Waitlist = list(set(Waitlist)) for i in Donelist: if i in Waitlist: Waitlist.remove(i) # print("Waitlist is " + str(Waitlist)) np.transpose(G) np.transpose(pdag) parents = [i for i in range(p) if pdag[i, target] == -1] children = [i for i in range(p) if pdag[target, i] == -1] undirected = [i for i in range(p) if pdag[target, i] == 1] return parents, children, undirected # # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv") # data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv") # # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt" # # data = np.loadtxt(path, dtype=None, delimiter= ' ') # target = 0 # Graph, p, c = MB_by_MB(data,target,0.01) # print("\nin the last -------------------------------------") # print(Graph) # print("target " + str(target) + " parents are " + str(p)) # print("target " + str(target) + " children are " + str(c))