def MMMB(data, target, alaph, is_discrete=True): ci_number = 0 PC, sepset, ci_num2 = MMPC(data, target, alaph, is_discrete) ci_number += ci_num2 # print("PC is: " + str(PC)) # print("sepset is: " + str(sepset)) MB = PC.copy() for x in PC: # print(x) PCofPC, _, ci_num3 = MMPC(data, x, alaph, is_discrete) ci_number += ci_num3 # print("PCofPC is: " + str(PCofPC)) for y in PCofPC: # print("_-++++++-_") if y != target and y not in PC: conditions_Set = [str(i) for i in sepset[y]] conditions_Set.append(str(x)) conditions_Set = list(set(conditions_Set)) ci_number += 1 pval, dep = cond_indep_test(data, target, y, conditions_Set, is_discrete) # print("_----_") if pval <= alaph: MB.append(y) break return MB, ci_number
def MMHC(data, alpha=0.01, score_function='bdeu'): # input: # data: input training data, the data must be discrete # score: the type of score function, currently support 'bdeu', 'bic' # threshold: threshold for CI test # output: # dag: a direct graph _, kvar = np.shape(data) DAG = np.zeros((kvar, kvar)) pc = {} num_CI = 0 for tar in range(kvar): pc_mm, _, n_c = MMPC(data, tar, alpha, True) num_CI += n_c for i in pc_mm: DAG[tar, i] = 1 DAG[i, tar] = 1 pc[str(tar)] = [str(i) for i in pc_mm] # check the symmetry of pc set # when the number of variables is large, this function may be computational costly # this function can be merged into the pruning process during forward and backward mmpc by transmitting the whole # pc set into mmpc_forward and mmpc_backward pc = symmetry(pc) # run hill-climbing dag_dict = hc(data, pc, score_function) # orient the edge for key, value in dag_dict.items(): x = int(key) for i in value['parents']: y = int(i) DAG[y, x] = -1 DAG[x, y] = 0 for i in value['children']: z = int(i) DAG[x, z] = -1 DAG[z, x] = 0 return DAG, num_CI # import pandas as pd # # from CBD.MBs.common.realMB import realMB # from LSL.MBs.common.real_P_C_S import real_p_c_s # if __name__ == '__main__': # # data = pd.read_csv('D:/data/child_data/Child_s5000_v2.csv') # real_graph_path = "D:/data/child_data/Child_graph.txt" # _, all_number = np.shape(data) # real_p, real_c, real_s = real_p_c_s(all_number, real_graph_path) # print("real_p is:", real_p) # print("real_c is:", real_c) # DAG = MMHC(data) # print(DAG) # with open(r".\result.txt", "a+") as file: # file.write(str(DAG))
def example(method, data, list_target, alpha=0.01, is_discrete=True): file = open("../output/pc.txt", "w+") if method == "MBtoPC": _, kVar = numpy.shape(data) start_time = time.process_time() for target in list_target: pc, ci_num = MBtoPC(data, target, alpha, [i for i in range(kVar)], is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() elif method == "pc_simple": start_time = time.process_time() for target in list_target: pc, ci_num = pc_simple(data, target, alpha, is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() elif method == "HITON_PC": start_time = time.process_time() for target in list_target: pc, _, _ = HITON_PC(data, target, alpha, is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() elif method == "MMPC": start_time = time.process_time() for target in list_target: pc, _, _ = MMPC(data, target, alpha, is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() elif method == "getPC": start_time = time.process_time() for target in list_target: pc, _, _ = getPC(data, target, alpha, is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() elif method == "semi_HITON_PC": start_time = time.process_time() for target in list_target: pc, _, _ = semi_HITON_PC(data, target, alpha, is_discrete) file.write("the pc of " + str(target) + " is:" + str(pc) + "\n") print("the pc of " + str(target) + " is:" + str(pc)) end_time = time.process_time() else: raise Exception("method input error!") print("the running time is: " + str(end_time - start_time)) file.write("the running time is: " + str(end_time - start_time) + "\n") file.close()
def PCDbyPCD(data, target, alaph, is_discrete=True): _, kVar = np.shape(data) DAG = np.zeros((kVar, kVar)) pDAG = DAG.copy() G = DAG.copy() sepset_all = [[[]]] * kVar PCD_set_all = [[]] * kVar tmp = [] Q = [target] parents = [] children = [] undirected = [] lnum = 0 while len(tmp) <= kVar and Q != []: A = Q[0] # print("A is: " + str(A)) del Q[0] if A in tmp: continue else: tmp.append(A) #Get PC(A) if PCD_set_all[A] == []: PCD_set_all[A], sepset_all[A], _ = MMPC(data, A, alaph, is_discrete) for B in PCD_set_all[A]: # print("B is: " + str(B)) Q.append(B) # if PCD_set_all[B] == []: # PCD_set_all[B], sepset_all[B], _ = MMPC(data, B, alaph) if A not in PCD_set_all[B]: continue DAG[A, B] = 1 DAG[B, A] = 1 if pDAG[A, B] == 0 and pDAG[B, A] == 0: pDAG[A, B] = 1 pDAG[B, A] = 1 G[A, B] = 1 G[B, A] = 1 for C in PCD_set_all[B]: # if PCD_set_all[C] == []: # PCD_set_all[C], sepset_all[C], _ = MMPC(data, C, alaph) # if B not in PCD_set_all[C]: # continue # # DAG[C, B] = 1 # DAG[B, C] = 1 # # if pDAG[C, B] == 0 and pDAG[B, C] == 0: # pDAG[C, B] = 1 # pDAG[B, C] = 1 # G[C, B] = 1 # G[B, C] = 1 if C in PCD_set_all[A] or C == A: continue # v-structure if DAG[C, B] == 1 and DAG[B, C] == 1: if B not in sepset_all[A][C]: pDAG[A, B] = -1 pDAG[B, A] = 0 pDAG[C, B] = -1 pDAG[B, C] = 0 G[A, B] = 1 G[B, A] = 0 G[C, B] = 1 G[B, C] = 0 pDAG = Meek(DAG, pDAG, data) lnum += 1 length_PCD_set = 0 for i in range(kVar): if PCD_set_all[i] != []: length_PCD_set += 1 # break condition if lnum > length_PCD_set: if np.all(pDAG[:, target] != 1) and np.all(pDAG[target, :] != 1): # print("break") break # print(pDAG) for i in range(kVar): if pDAG[i, target] == -1: parents.append(i) if pDAG[target, i] == -1: children.append(i) if pDAG[target, i] == 1: undirected.append(i) return parents, children, undirected