def SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete): ci_number = 0 _, kVar = np.shape(data) SPS = [] for x in PCS: SPS_x = [] vari_set = [i for i in range(kVar) if i != target and i not in PCS] for y in vari_set: conditon_set = [i for i in d_sep[y]] conditon_set.append(x) conditon_set = list(set(conditon_set)) ci_number += 1 pval, _ = cond_indep_test(data, target, y, conditon_set, is_discrete) if pval <= alaph: SPS_x.append(y) SPS_x_temp = SPS_x.copy() for y in SPS_x_temp: SPS_x_rmy = [i for i in SPS_x if i != y] for z in SPS_x_rmy: ci_number += 1 pval, _ = cond_indep_test(data, target, y, [x, z], is_discrete) if pval > alaph: SPS_x.remove(y) break SPS = list(set(SPS).union(set(SPS_x))) return SPS, ci_number
def PCSuperSet(data, target, alaph, is_discrete): ci_number = 0 d_sep = dict() _, kVar = np.shape(data) PCS = [i for i in range(kVar) if i != target] PCS_temp = PCS.copy() for x in PCS_temp: ci_number += 1 pval, _ = cond_indep_test(data, target, x, [], is_discrete) if pval > alaph: PCS.remove(x) d_sep.setdefault(x, []) PCS_temp = PCS.copy() for x in PCS_temp: PCS_rmX = [i for i in PCS if i != x] for y in PCS_rmX: ci_number += 1 pval, _ = cond_indep_test(data, target, x, [y], is_discrete) if pval > alaph: PCS.remove(x) d_sep.setdefault(x, [y]) break return PCS, d_sep, ci_number
def getMinDep(data, target, x, CPC, alpha, is_discrete): """this function is to chose min dep(association) about Target,x|(subsets of CPC)""" ci_number = 0 dep_min = float("inf") max_k = 3 # 在这图中很少一个节点的Perents或child(其中一个)超过三个,即最多图中a->b,c,d->z,所以最多条件集三个(a,z)|(b,c,d) # 便可测试出(a,z)是否独立,可极大得减少时间复杂度 if len(CPC) > max_k: k_length = max_k else: k_length = len(CPC) for i in range(k_length+1): SS = subsets(CPC, i) for S in SS: ci_number += 1 pval, dep = cond_indep_test(data, target, x, S, is_discrete) # this judge about target and x whether or not is condition independence ,if true,dep must be zero, # and end operating of function of getMinDep if pval > alpha: return 0, S ,ci_number if dep_min > dep: dep_min = dep return dep_min, None, ci_number
def FBED(data, target, k, alaph, is_discrete=True): S = [] k_cur = 0 s_change_flag = True ci_number = 0 # Forward phase while k_cur <= k and s_change_flag == True: S_last = S.copy() S, ci_num = one_run(data, target, S, alaph, is_discrete) k_cur += 1 ci_number += ci_num if set(S_last) == set(S): s_change_flag = False # Backward phase # print("now S have: " + str(S)) S_temp = S.copy() for x in S_temp: condition_set = [i for i in S if i != x] ci_number += 1 pval, _ = cond_indep_test(data, target, x, condition_set, is_discrete) # print("x is: " + str(x) + " ,conditionset is:" + str(condition_set)) if pval > alaph: S.remove(x) return list(set(S)), ci_number
def IPC_MB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) CanADJT = [i for i in range(kVar) if i != target] PC, sepset, ci_number = RecognizePC(data, target, CanADJT, alaph, is_discrete) # print("pc is: " + str(PC)) # print("sepset is: " + str(sepset)) MB = PC.copy() for x in PC: CanADJT_X = [i for i in range(kVar) if i != x] CanSP, _, ci_num2 = RecognizePC(data, x, CanADJT_X, alaph, is_discrete) ci_number += ci_num2 # print("CanSP:" + str(CanSP)) if target not in CanSP: MB.remove(x) continue for y in CanSP: if y != target and y not in MB: conditionsSet = [i for i in sepset[y]] conditionsSet.append(x) conditionsSet = list(set(conditionsSet)) ci_number += 1 pval, dep = cond_indep_test(data, target, y, conditionsSet, is_discrete) if pval <= alaph: # print("append is:" + str(y)+" conditinSet: " + str(conditionsSet)) MB.append(y) return list(set(MB)), ci_number
def pc_simple(data, target, alaph, isdiscrete): number, kVar = np.shape(data) ciTest = 0 k = 0 # chose all variables except target itself PC = [i for i in range(kVar) if i != target] while len(PC) > k: PC_temp = PC.copy() for x in PC_temp: # see number of circulate condition_subsets = [i for i in PC_temp if i != x] if len(condition_subsets) >= k: # get a difinite number of subsets of condition_subsets css = subsets(condition_subsets, k) for s in css: # every k length of subsets should test chi square and if # make x and target CI,x removed pval, dep = cond_indep_test(data, x, target, s, isdiscrete) ciTest += 1 if pval > alaph: PC.remove(x) break # end circulate of s k += 1 return PC, ciTest
def MMMB(data, target, alaph, is_discrete=True): ci_number = 0 PC, sepset, ci_num2 = MMPC(data, target, alaph, is_discrete) ci_number += ci_num2 # print("PC is: " + str(PC)) # print("sepset is: " + str(sepset)) MB = PC.copy() for x in PC: # print(x) PCofPC, _, ci_num3 = MMPC(data, x, alaph, is_discrete) ci_number += ci_num3 # print("PCofPC is: " + str(PCofPC)) for y in PCofPC: # print("_-++++++-_") if y != target and y not in PC: conditions_Set = [str(i) for i in sepset[y]] conditions_Set.append(str(x)) conditions_Set = list(set(conditions_Set)) ci_number += 1 pval, dep = cond_indep_test(data, target, y, conditions_Set, is_discrete) # print("_----_") if pval <= alaph: MB.append(y) break return MB, ci_number
def RecognizePC(data, target, ADJT, alaph, is_discrete=True): number, kVar = np.shape(data) NonPC = [] cutSetSize = 0 sepset = [[] for i in range(kVar)] ci_number = 0 while len(ADJT) > cutSetSize: for x in ADJT: ADJT_X = [i for i in ADJT if i != x] SSubset = subsets(ADJT_X, cutSetSize) for S in SSubset: ci_number += 1 pval_gp, dep_gp = cond_indep_test(data, target, x, S, is_discrete) if pval_gp > alaph: NonPC.append(x) sepset[x] = [i for i in S] break if len(NonPC) > 0: ADJT = [i for i in ADJT if i not in NonPC] cutSetSize += 1 NonPC = [] else: break return ADJT, sepset, ci_number
def IAMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) CMB = [] ci_number = 0 # forward circulate phase circulate_Flag = True while circulate_Flag: # if not change, forward phase of IAMB is finished. circulate_Flag = False # tem_dep pre-set infinite negative. temp_dep = -(float)("inf") y = None variables = [i for i in range(kVar) if i != target and i not in CMB] for x in variables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, CMB, is_discrete) # chose maxsize of f(X:T|CMB) if pval <= alaph: if dep > temp_dep: temp_dep = dep y = x # if not condition independence the node,appended to CMB if y is not None: # print('appended is :'+str(y)) CMB.append(y) circulate_Flag = True # backward circulate phase # print(CMB) CMB_temp = CMB.copy() for x in CMB_temp: # exclude variable which need test p-value condition_Variables = [i for i in CMB if i != x] ci_number += 1 pval, dep = cond_indep_test(data, target, x, condition_Variables, is_discrete) if pval > alaph: # print("removed variables is: " + str(x)) CMB.remove(x) return CMB, ci_number
def IPC_MB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) CanADJT = [i for i in range(kVar) if i != target] PC, sepset, ci_number = RecognizePC(data, target, CanADJT, alaph, is_discrete) # print("pc is: " + str(PC)) # print("sepset is: " + str(sepset)) MB = PC.copy() for x in PC: CanADJT_X = [i for i in range(kVar) if i != x] CanSP, _, ci_num2 = RecognizePC(data, x, CanADJT_X, alaph, is_discrete) ci_number += ci_num2 # print("CanSP:" + str(CanSP)) if target not in CanSP: MB.remove(x) continue for y in CanSP: if y != target and y not in MB: conditionsSet = [str(i) for i in sepset[y]] conditionsSet.append(str(x)) conditionsSet = list(set(conditionsSet)) ci_number += 1 pval, dep = cond_indep_test(data, target, y, conditionsSet, is_discrete) if pval <= alaph: # print("append is:" + str(y)+" conditinSet: " + str(conditionsSet)) MB.append(y) return MB, ci_number # data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v3.csv") # print("the file read") # # target = 6 # alaph = 0.05 # # MBs=IPC_MB(data,target,alaph) # print("MBs is: "+str(MBs)) # F1 is: 0.7997213203463205 # Precision is: 0.893875 # Recall is: 0.7637083333333331 # time is: 26.190546875 #5000 # F1 is: 0.96 # Precision is: 0.94 # Recall is: 1.0 # Distance is: 0.06 # ci_number is: 486.135 # time is: 18.63
def IAMB(data, target, alaph, attribute, is_discrete): CMB = [] ci_number = 0 # forward circulate phase circulate_Flag = True while circulate_Flag: circulate_Flag = False # tem_dep pre-set infinite negative. temp_dep = -(float)("inf") y = None variables = [i for i in attribute if i != target and i not in CMB] for x in variables: ci_number += 1 pival, dep = cond_indep_test(data, target, x, CMB, is_discrete) # chose maxsize of f(X:T|CMB) if pival <= alaph: if dep > temp_dep: temp_dep = dep y = x # if not condition independence the node,appended to CMB if y is not None: CMB.append(y) circulate_Flag = True # backward circulate phase CMB_temp = CMB.copy() for x in CMB_temp: # exclude variable which need test p-value condition_Variables = [i for i in CMB if i != x] ci_number += 1 pval, dep = cond_indep_test(data, target, x, condition_Variables, is_discrete) if pval > alaph: CMB.remove(x) return CMB, ci_number
def KIAMB(data, target, alaph, k, is_discrete=True): n, p = np.shape(data) MB = [] ci_number = 0 flag = True while flag: x_dep = [0 for i in range(p)] flag = False CanMB = [] variables = [i for i in range(p) if i != target and i not in MB] for x in variables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, MB, is_discrete) if pval <= alaph: CanMB.append(x) x_dep[x] = dep if len(CanMB) == 0: break CanMB2 = random.sample(CanMB, max(1, int(len(CanMB) * k))) max_dep = -float("inf") Y = None for x in CanMB2: if x_dep[x] > max_dep: Y = x max_dep = x_dep[x] if Y is not None: MB.append(Y) flag = True # remove false positives from MB MB_temp = MB.copy() for x in MB_temp: condition_set = [i for i in MB if i != x] ci_number += 1 pval, _ = cond_indep_test(data, target, x, condition_set, is_discrete) if pval > alaph: MB.remove(x) return list(set(MB)), ci_number
def semi_HITON_MB(data, target, alaph, is_discrete=True): TPC, sep, ci_number = semi_HITON_PC(data, target, alaph) MB = TPC.copy() for x in TPC: xPC, sepx, ci_number2 = semi_HITON_PC(data, x, alaph) ci_number += ci_number2 for y in xPC: if y != target and y not in TPC: condition_set = [str(i) for i in sep[y]] condition_set = list(set(condition_set).union(set(str(x)))) ci_number += 1 _, pval, _, _ = cond_indep_test(data, target, y, condition_set, is_discrete) if pval <= alaph: # print("append y is " + str(y)) MB.append(y) break return MB, ci_number # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv") # MB = semi_HITON_MB(data,1,0.01) # print(MB) # 500 0.01 # F1 is: 0.8089410311910312 # Precision is: 0.9234523809523809 # Recall is: 0.7709166666666666 # time is: 16.431171875 # 5000 0.01 # F1 is: 0.9340098937010702 # Precision is: 0.9733333333333334 # Recall is: 0.9137083333333336 # time is: 57.92828125 # 500 0.01 # F1 is: 0.81 # Precision is: 0.92 # Recall is: 0.77 # Distance is: 0.28 # ci_number is: 280.71 # time is: 16.43 # 5000 0.01 # F1 is: 0.93 # Precision is: 0.97 # Recall is: 0.91 # Distance is: 0.11 # ci_number is: 644.42 # time is: 56.91
def GSMB(data, target, alaph, is_discrete): number, kVar = np.shape(data) CMB = [] ci_number = 0 circulateFlag = True S_variables = [i for i in range(kVar) if i != target] """grow phase""" # print("grow phase") while circulateFlag: circulateFlag = False for x in S_variables: ci_number += 1 pval_gp, dep_gp = cond_indep_test(data, target, x, CMB, is_discrete) if pval_gp < alaph: # print("CMB append is: "+str(x)) CMB.append(x) circulateFlag = True break S_variables = [i for i in range(kVar) if i != target and i not in CMB] """"shrink phase""" # print("shrink phase") circulateFlag = True while circulateFlag: circulateFlag = False CMB_temp = CMB.copy() for x in CMB_temp: subsets_CMB = [i for i in CMB if i != x] ci_number += 1 pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_CMB, is_discrete) if pval_sp > alaph: # print("CMB remove is: "+ str(x)) CMB.remove(x) circulateFlag = True break return list(set(CMB)), ci_number
def HITON_MB(data, target, alaph, is_discrete=True): PC, sepset, ci_number = HITON_PC(data, target, alaph, is_discrete) # print("PC is:" + str(PC)) currentMB = PC.copy() for x in PC: # print("x is: " + str(x)) PCofPC, _, ci_num2 = HITON_PC(data, x, alaph, is_discrete) ci_number += ci_num2 # print("PCofPC is " + str(PCofPC)) for y in PCofPC: # print("y is " + str(y)) if y != target and y not in PC: conditions_Set = [str(i) for i in sepset[y]] conditions_Set.append(str(x)) conditions_Set = list(set(conditions_Set)) ci_number += 1 pval, dep = cond_indep_test(data, target, y, conditions_Set, is_discrete) if pval <= alaph: # print("append is: " + str(y)) currentMB.append(y) break return currentMB, ci_number # data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v1.csv") # print("the file read") # # target = 4 # alaph = 0.05 # # MBs=HITON_MB(data,target,alaph) # print("MBs is: "+str(MBs)) # 500 # F1 is: 0.8465906593406597 # Precision is: 0.8957857142857146 # Recall is: 0.85525 # time is: 27.555 # 5000 # F1 is: 0.98 # Recall is: 0.99 # Distance is: 0.03 # ci_number is: 1017.85 # time is: 96.69
def semi_HITON_MB(data, target, alaph, is_discrete=True): TPC, sep, ci_number = semi_HITON_PC(data, target, alaph) MB = TPC.copy() for x in TPC: xPC, sepx, ci_number2 = semi_HITON_PC(data, x, alaph) ci_number += ci_number2 for y in xPC: if y != target and y not in TPC: condition_set = [i for i in sep[y]] condition_set = list(set(condition_set).union(set([x]))) ci_number += 1 pval, _ = cond_indep_test(data, target, y, condition_set, is_discrete) if pval <= alaph: # print("append y is " + str(y)) MB.append(y) break return list(set(MB)), ci_number
def one_run(data, target, S, alaph, is_discrete): ci_number = 0 number, kVar = np.shape(data) R = [i for i in range(kVar) if i not in S and i != target] while len(R) > 0: vari_dep_set = [] for x in R: ci_number += 1 pval, dep = cond_indep_test(data, target, x, S, is_discrete) # print("x is: " + str(x) + " ,S is: " + str(S) + " ,pval is: " + str(pval) + " ,dep is: " + str(dep)) if pval <= alaph: vari_dep_set.append([x, dep]) vari_dep_set = sorted(vari_dep_set, key=lambda x: x[1], reverse=True) # print("varidepset have: " + str(vari_dep_set)) if vari_dep_set != []: S.append(vari_dep_set[0][0]) # print("S have: " + str(S)) del vari_dep_set[0] R = [vari_dep_set[i][0] for i in range(len(vari_dep_set))] # print("R have: " + str(R)) else: R = [] return S, ci_number
def MBtoPC(data, target, alaph, attribute, is_discrete): max_k = 3 ci_number = 0 MB, ci_num = IAMB(data, target, alaph, attribute, is_discrete) ci_number += ci_num PC = MB.copy() for x in MB: break_flag = False condtion_sets_all = [i for i in MB if i != x] c_length = len(condtion_sets_all) if c_length > max_k: c_length = max_k for j in range(c_length + 1): condtion_sets = subsets(condtion_sets_all, j) for Z in condtion_sets: ci_number += 1 pval, _ = cond_indep_test(data, target, x, Z, is_discrete) if pval > alaph: PC.remove(x) break_flag = True break if break_flag: break return PC, ci_number
def fast_IAMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) ci_number = 0 #BT present B(T) and set null,according to pseudocode MB = [] # set a dictionary to store variables and their pval,but it temporary memory S_variables = [] MBvariables = [i for i in range(kVar) if i != target] repeat_in_set = [0 for i in range(kVar)] num_reapeat = 10 no_in_set = [] for x in MBvariables: ci_number += 0 pval, dep = cond_indep_test(data, target, x, MB, is_discrete) if (pval <= alaph): S_variables.append([x, dep]) BT_temp = -1 """iteritems() 得到的[(键,值)]的列表, 通过sorted方法,指定排序的键值key是原来字典中的value属性,其中 用到了匿名函数lambda, 参数为t列表,返回第二个元素t[1],也就是每个键值对中的value, 从小到大排序时 reverse=False, 从大到小排序是True! output is [(key,value),...],which is sorted, and other aim is turn dictionary into this structrue [(key,value)]""" # preset value attributes_removed_Flag = False while S_variables != []: flag_repeat_set = [False for i in range(kVar)] # S sorted according to pval S_variables = sorted(S_variables, key=lambda x: x[1], reverse=True) # print(S_variables) """Growing phase""" # print("growing phase begin!") S_length = len(S_variables) insufficient_data_Flag = False attributes_removed_Flag = False for y in range(S_length): x = S_variables[y][0] # number = number # print("MBs is: " + str(MBs)) qi = ns(data, MB) # print("qi is: " + str(qi)) tmp = [1] temp1 = [] if len(qi) > 1: temp1 = np.cumprod(qi[0:-1]) # print("temp1 is: " + str(temp1)) for i in temp1: tmp.append(i) # qs = 1 + ([i-1 for i in qi]) * tmp # qs = np.array([i-1 for i in qi])* np.array(tmp).reshape(len(tmp),1) + 1 # print("qi is: " + str(qi) + " ,tmp is: " + str(tmp)) qs = 0 if qi == []: qs = 0 else: for i in range(len(qi)): qs += (qi[i] - 1) * tmp[i] qs += 1 # print("qs is: " + str(qs)) qxt = ns(data, [x, target]) # print("length of qs is:" + str(len(list(qs)))) # print("qxt is: " + str(qxt)) if qs == 0: df = np.prod(np.mat([i - 1 for i in qxt])) * np.prod(np.mat(qi)) # print("1 = " + str(np.prod(np.array([i-1 for i in qxt]))) + " , 2 = " + str(np.prod(np.array(qi)))) else: df = np.prod(np.mat([i - 1 for i in qxt])) * qs # print("1 = " + str(np.prod(np.array([i-1 for i in qxt])))+" , 22 = " + str(qs)) # print("df = " + str(df)) if number >= 5 * df: # S_sort = [(key,value),....],and BT append is key MB.append(S_variables[y][0]) flag_repeat_set[S_variables[y][0]] = True # print("BT append is: " + str(S_variables[y][0])) else: # print('1') insufficient_data_Flag = True # due to insufficient data, then go to shrinking phase break """shrinking phase""" # print("shrinking phase begin") if BT_temp == MB: break BT_temp = MB.copy() # print(BT) for x in BT_temp: subsets_BT = [i for i in MB if i != x] ci_number += 1 pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_BT, is_discrete) if pval_sp > alaph: MB.remove(x) if flag_repeat_set[x] == True: repeat_in_set[x] += 1 if repeat_in_set[x] > num_reapeat: no_in_set.append(x) # print("x not in again is: " + str(x)) # print("BT remove is: "+str(x)) attributes_removed_Flag = True # if no variable will add to S_variables, circulate will be break,and output the result if (insufficient_data_Flag == True) and (attributes_removed_Flag == False): # print("circulate end!") break else: # set a new S_variables ,and add variable which match the condition S_variables = [] # print("circulate should continue,so S_variable readd variables") BTT_variables = [ i for i in range(kVar) if i != target and i not in MB and i not in no_in_set ] # print(BTT_variables) for x in BTT_variables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, MB, is_discrete) if pval <= alaph: # print([x,dep]) S_variables.append([x, dep]) # print("sv is: " + str(S_variables)) return MB, ci_number
def semi_HITON_PC(data, target, alaph, is_disrete=True): n, p = np.shape(data) ci_number = 0 candidate_pc = [] S = [] current_pc = [] sep = [[] for i in range(p)] con = [i for i in range(p) if i != target] for x in con: ci_number += 1 pval, dep = cond_indep_test(data, target, x, [], is_disrete) if pval <= alaph: S.append([x, dep]) depset = sorted(S, key=lambda x: x[1], reverse=True) for i in range(len(depset)): candidate_pc.append(depset[i][0]) # RANK for x in candidate_pc: breakflag = False current_pc.append(x) conditions_set = [i for i in current_pc if i != x] # print("conditions_set is " + str(conditions_set)) if len(conditions_set) >= 3: Slength = 3 else: Slength = len(conditions_set) for j in range(Slength + 1): SS = subsets(conditions_set, j) for s in SS: ci_number += 1 pval, _ = cond_indep_test(data, x, target, s, is_disrete) if pval > alaph: sep[x] = [i for i in s] current_pc.remove(x) breakflag = True break if breakflag: break # backforward phase except the last add variable Last_added = None if len(current_pc) > 0: Last_added = current_pc[-1] current_temp = current_pc.copy() for x in current_temp: flag = False if x == Last_added: continue con_set = [i for i in current_pc if i != x] if len(con_set) >= 3: leng = 3 else: leng = len(con_set) for j in range(leng + 1): SS = subsets(con_set, j) for s in SS: ci_number += 1 pval, _ = cond_indep_test(data, x, target, s, is_disrete) if pval > alaph: current_pc.remove(x) sep[x] = [i for i in s] flag = True break if flag: break return list(set(current_pc)), sep, ci_number
def BAMB(data, target, alaph, is_discrete=True): ci_number = 0 number, kVar = np.shape(data) max_k = 3 CPC = [] TMP = [i for i in range(kVar) if i != target] sepset = [[] for i in range(kVar)] CSPT = [[] for i in range(kVar)] variDepSet = [] SP = [[] for i in range(kVar)] PC = [] for x in TMP: ci_number += 1 pval_f, dep_f = cond_indep_test(data, target, x, [], is_discrete) if pval_f > alaph: sepset[x] = [] else: variDepSet.append([x, dep_f]) variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) """step one: Find the candidate set of PC and candidate set of spouse""" # print("variDepSet" + str(variDepSet)) for variIndex in variDepSet: A = variIndex[0] # print("A is: " + str(A)) Slength = len(CPC) if Slength > max_k: Slength = 3 breakFlag = False for j in range(Slength + 1): ZSubsets = subsets(CPC, j) for Z in ZSubsets: ci_number += 1 convari = [i for i in Z] pval_TAZ, dep_TAZ = cond_indep_test(data, target, A, convari, is_discrete) if pval_TAZ > alaph: sepset[A] = convari breakFlag = True # print("ZZZ") break if breakFlag: break if not breakFlag: CPC_ReA = CPC.copy() B_index = len(CPC_ReA) CPC.append(A) breakF = False while B_index > 0: B_index -= 1 B = CPC_ReA[B_index] flag1 = False conditionSet = [i for i in CPC_ReA if i != B] Clength = len(conditionSet) if Clength > max_k: Clength = max_k for j in range(Clength + 1): CSubsets = subsets(conditionSet, j) for Z in CSubsets: ci_number += 1 convari = [i for i in Z] pval_TBZ, dep_TBZ = cond_indep_test( data, target, B, convari, is_discrete) # print("pval_TBZ: " + str(pval_TBZ)) if pval_TBZ >= alaph: CPC.remove(B) CSPT[B] = [] sepset[B] = convari flag1 = True if B == A: breakF = True if flag1: break if breakF: break CSPT[A] = [] pval_CSPT = [] # add candidate of spouse # print("sepset: " + str(sepset)) for C in range(kVar): if C == target or C in CPC: continue conditionSet = [i for i in sepset[C]] conditionSet.append(A) conditionSet = list(set(conditionSet)) ci_number += 1 pval_CAT, _ = cond_indep_test(data, target, C, conditionSet, is_discrete) if pval_CAT <= alaph: CSPT[A].append(C) pval_CSPT.append([C, pval_CAT]) """step 2-1""" pval_CSPT = sorted(pval_CSPT, key=lambda x: x[1], reverse=False) SP[A] = [] # print("CSPT-: " +str(CSPT)) # print("pval_CSPT is: " + str(pval_CSPT)) for pCSPT_index in pval_CSPT: E = pCSPT_index[0] # print("E is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " +str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllconditionSet = [i for i in SP[A] if i != x] # print("ZAllconditionSet is:" + str(ZAllconditionSet)) for Z in ZAllconditionSet: conditionvari = [Z] if A not in conditionvari: conditionvari.append(A) ci_number += 1 pval_TXZ, _ = cond_indep_test(data, target, x, conditionvari, is_discrete) # print("x is: " + str(x) + "conditionvari: " + str(conditionvari) + " ,pval_TXZ is: " + str(pval_TXZ)) if pval_TXZ > alaph: # print("spa is: " + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(conditionvari)) SP[A].remove(x) breakFlag = True if x == E: breakflag_spa = True break if breakFlag: break if breakflag_spa: break """step 2-2""" # remove x from pval_CSPT pval_CSPT_new = [] plength = len(pval_CSPT) for i in range(plength): if pval_CSPT[i][0] in SP[A]: pval_CSPT_new.append(pval_CSPT[i]) CSPT[A] = SP[A] SP[A] = [] # print("CSPT-: " + str(CSPT)) # print("2222222pval_CSPT_new is: " + str(pval_CSPT_new)) for pCSPT_index in pval_CSPT_new: E = pCSPT_index[0] # print("E2 is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " + str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllSubsets = list(set(CPC).union(set(SP[A]))) # print("CPC is: " + str(CPC) + " , SP[A] is: " + str(SP[A]) + " ,A is" + str(A) + " ,x is:" + str(x) + " ,ZA is: " + str(ZAllSubsets)) ZAllSubsets.remove(x) ZAllSubsets.remove(A) # print("-ZALLSubsets has: " + str(ZAllSubsets)) Zalength = len(ZAllSubsets) if Zalength > max_k: Zalength = max_k for j in range(Zalength + 1): ZaSubsets = subsets(ZAllSubsets, j) for Z in ZaSubsets: Z = [i for i in Z] ci_number += 1 pval_TXZ, _ = cond_indep_test( data, A, x, Z, is_discrete) # print("Z is: " + str(Z) + " ,A is: " + str(A) + " ,x is: " + str(x) + " ,pval_txz is: " + str(pval_TXZ)) if pval_TXZ > alaph: # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z)) SP[A].remove(x) breakFlag = True if x == E: breakflag_spa = True break if breakFlag: break if breakflag_spa: break """ step 2-3""" pval_CSPT_fin = [] plength = len(pval_CSPT) for i in range(plength): if pval_CSPT[i][0] in SP[A]: pval_CSPT_fin.append(pval_CSPT[i]) CSPT[A] = SP[A] SP[A] = [] # print("CSPT-: " +str(CSPT)) # print("2222222pval_CSPT_fin is: " + str(pval_CSPT_fin)) for pCSPT_index in pval_CSPT_fin: E = pCSPT_index[0] # print("E3 is:" + str(E)) SP[A].append(E) index_spa = len(SP[A]) breakflag_spa = False # print("SP[A] is: " + str(SP[A])) while index_spa >= 0: index_spa -= 1 x = SP[A][index_spa] breakFlag = False # print("x is:" + str(x)) ZAllSubsets = list(set(CPC).union(set(SP[A]))) ZAllSubsets.remove(x) ZAllSubsets.remove(A) Zalength = len(ZAllSubsets) # print("=-ZALLSubsets has: " + str(ZAllSubsets)) if Zalength > max_k: Zalength = max_k for j in range(Zalength + 1): ZaSubsets = subsets(ZAllSubsets, j) # print("ZzSubsets is: " + str(ZaSubsets)) for Z in ZaSubsets: Z = [i for i in Z] Z.append(A) # print("Z in ZaSubsets is: " + str(Z)) ci_number += 1 pval_TXZ, _ = cond_indep_test( data, target, x, Z, is_discrete) # print("-Z is: " + str(Z) + " ,x is: " + str(x) + " ,pval_txz is: " + str( # pval_TXZ)) if pval_TXZ >= alaph: # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z)) SP[A].remove(x) if x == E: breakflag_spa = True breakFlag = True break if breakFlag: break if breakflag_spa: break # print("SP[A]------: " + str(SP[A])) CSPT[A] = SP[A] # print("CSPT is: " + str(CSPT)) """step3: remove false positives from the candidate set of PC""" CPC_temp = CPC.copy() x_index = len(CPC_temp) A_breakFlag = False # print("-CPC-: " + str(CPC)) while x_index >= 0: x_index -= 1 x = CPC_temp[x_index] flag2 = False ZZALLsubsets = [i for i in CPC if i != x] # print("xx is: " + str(x) + ", ZZALLsubsets is: " + str(ZZALLsubsets )) Zlength = len(ZZALLsubsets) if Zlength > max_k: Zlength = max_k for j in range(Zlength + 1): Zzsubsets = subsets(ZZALLsubsets, j) for Z in Zzsubsets: conditionSet = [ i for y in Z for i in CSPT[y] if i not in CPC ] conditionSet = list(set(conditionSet).union(set(Z))) # print("conditionSet: " + str(conditionSet)) ci_number += 1 pval, _ = cond_indep_test(data, target, x, conditionSet, is_discrete) if pval >= alaph: # print("remove x is: " + str(x) + " , pval is: " + str(pval) + " ,conditionset is: " + str(conditionSet)) CPC.remove(x) CSPT[x] = [] flag2 = True if x == A: A_breakFlag = True break if flag2: break if A_breakFlag: break # print("SP is:" + str(SP)) spouseT = [j for i in CPC for j in CSPT[i]] MB = list(set(CPC).union(set(spouseT))) return MB, ci_number
def MMPC(data, target, alpha, is_discrete): number, kVar = np.shape(data) ci_number = 0 CPC = [] deoZeroSet = [] sepset = [[] for i in range(kVar)] while True: M_variables = [ i for i in range(kVar) if i != target and i not in CPC and i not in deoZeroSet ] vari_all_dep_max = -float("inf") vari_chose = 0 # according to pseudocode, <F,assocF> = MaxMinFeuristic(T;CPC) for x in M_variables: # use a function of getMinDep to chose min dep of x x_dep_min, sepset_temp, ci_num2 = getMinDep( data, target, x, CPC, alpha, is_discrete) ci_number += ci_num2 # print(str(x)+" dep min is: " + str(x_dep_min)) # if x chose min dep is 0, it never append to CPC and should not test from now on, if x_dep_min == 0: deoZeroSet.append(x) sepset[x] = [j for j in sepset_temp] elif x_dep_min > vari_all_dep_max: vari_chose = x vari_all_dep_max = x_dep_min # print("x chosed is: " + str(vari_chose)+" and its dep is: " + str(vari_all_dep_max)) if vari_all_dep_max >= 0: # print("CPC append is: "+ str(vari_chose)) CPC.append(vari_chose) else: # CPC has not changed(In other world,CPC not append new), circulate should be break break # print("CPC is:" +str(CPC)) """phaseII :Backward""" # print("shrinking phase begin") CPC_temp = CPC.copy() max_k = 3 for a in CPC_temp: C_subsets = [i for i in CPC if i != a] # please see explanation of the function of getMinDep() explanation # the chinese annotation ,if you see,you will know. if len(C_subsets) > max_k: C_length = max_k else: C_length = len(C_subsets) breakFlag = False for length in range(C_length + 1): if breakFlag: break SS = subsets(C_subsets, length) for S in SS: ci_number += 1 pval, dep = cond_indep_test(data, target, a, S, is_discrete) if pval > alpha: CPC.remove(a) breakFlag = True break return list(set(CPC)), sepset, ci_number
def inter_IAMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) ci_number = 0 MB=[] circulateFlag = True removeSet = [] rmNumberSet = [0 for i in range(kVar)] while circulateFlag: circulateFlag =False # print("MBs is:" + str(MBs)) dep_temp = - float("inf") pval_temp = 1 max_s = None # remove target element from set before test variables =[i for i in range(kVar) if i != target and i not in MB and i not in removeSet] # growing phase for s in variables: ci_number += 1 # print(numberOfCirculate) pval_gp, dep_gp = cond_indep_test(data, target,s, MB, is_discrete) if dep_gp > dep_temp: dep_temp = dep_gp max_s = s pval_temp = pval_gp if pval_temp <= alaph: # if any changes ,circulate should be continue circulateFlag = True MB.append(max_s) # print("BT append vari is:" + str(max_s)) # if not append any variables to BT before this,the shirnking phase must not delete any variables. # save time if circulateFlag == False: break # print("----> shrinking phase") # use mb_index ,to be pointer mb_index = len(MB) # 逆序 while mb_index >= 0: mb_index -= 1 x = MB[mb_index] ci_number += 1 subsets_Variables = [i for i in MB if i != x] pval_sp, dep_sp = cond_indep_test(data, target, x, subsets_Variables, is_discrete) if pval_sp > alaph: MB.remove(x) # remove the variables while have be append to MBs just,lead to circulation break if x == max_s: break rmNumberSet[x] += 1 if rmNumberSet[x] > 10: removeSet.append(x) # print("BT remove vari is: "+ str(x) + " ,rmNumberSet[x] is:" + str(rmNumberSet[x])) # if any changes,circulate should be contine # circulateFlag = True return list(set(MB)), ci_number
def IAMB(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) CMB = [] ci_number = 0 # forward circulate phase circulate_Flag = True while circulate_Flag: # if not change, forward phase of IAMB is finished. circulate_Flag = False # tem_dep pre-set infinite negative. temp_dep = -(float)("inf") y = None variables = [i for i in range(kVar) if i != target and i not in CMB] for x in variables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, CMB, is_discrete) # print("target is:",target,",x is: ", x," CMB is: ", CMB," ,pval is: ",pval," ,dep is: ", dep) # chose maxsize of f(X:T|CMB) if pval <= alaph: if dep > temp_dep: temp_dep = dep y = x # if not condition independence the node,appended to CMB if y is not None: # print('appended is :'+str(y)) CMB.append(y) circulate_Flag = True return list(set(CMB)), ci_number # F1 is: 0.75430044955045 # Precision is: 0.8198333333333335 # Recall is: 0.7885833333333332 # time is: 22.64546875 # F1 is: 0.81 # Precision is: 0.89 # Recall is: 0.79 # Distance is: 0.28 # ci_number is: 77.25 # time is: 15.16 # 5000 # # F1 is: 0.92 ±0.40 # Precision is: 0.94±0.53 # Recall is: 0.94±0.30 # Distance is: 0.12±0.56 # ci_number is: 95.82±38.47 # time is: 74.56±188.69 # F1 is: 0.89 # Precision is: 0.88 # Recall is: 0.94 # Distance is: 0.16 # ci_number is: 97.85 # time is: 88.89 # import pandas as pd # data = pd.read_csv("../data/Alarm1_s1000_v1.csv") # print("the file read") # # target = 2 # alaph = 0.01 # # MBs=IAMB(data, target, alaph, is_discrete=True) # print("MBs is: "+str(MBs))
def MBGSL(data, alpha, is_discrete, selected): _, kvar = np.shape(data) max_k = 3 all_MB = [[] for i in range(kvar)] all_neighbor = [[] for i in range(kvar)] PP = np.zeros((kvar, kvar)) num_CI = 0 for i in range(kvar): if selected == 1: MB, n_c = MMMB(data, i, alpha, is_discrete) elif selected == 2: MB, n_c = HITON_MB(data, i, alpha, is_discrete) elif selected == 3: MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete) else: MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete) num_CI += n_c for j in MB: PP[i, j] = 1 # # AND Rule # for i in range(kvar): # for j in range(0, i): # if DAG[i, j] != DAG[j, i]: # DAG[i, j] = 0 # DAG[j, i] = 0 for i in range(kvar): for j in range(0, i): if PP[i, j] != PP[j, i]: PP[i, j] = 1 PP[j, i] = 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_MB[i].append(j) # removes the possible spouse links between linked variables X and Y for x in range(kvar): for y in all_MB[x]: vs = set(all_MB[x]).union(set(all_MB[y])) varis = list((set(all_MB[x]).difference([y])).union( set(all_MB[y]).difference([x]))) k = 0 break_flag = False while len(varis) > k and k <= max_k: ss = subsets(varis, k) for s in ss: num_CI += 1 pval, _ = cond_indep_test(data, x, y, s, is_discrete) if pval > alpha: PP[x, y] = 0 PP[x, y] = 0 break_flag = True break if break_flag: break k += 1 for i in range(kvar): for j in range(kvar): if PP[i, j] == 1: all_neighbor[i].append(j) DAG = PP.copy() pdag = DAG.copy() G = DAG.copy() # orient edges for x in range(kvar): for y in all_neighbor[x]: sz = list((set(all_neighbor[x]).difference( all_neighbor[y])).difference([y])) for z in sz: PP[y, x] = -1 B = list((set(all_MB[y]).difference([z])).union( set(all_MB[z]).difference([y]))) break_flag = False cutSetSize = 0 while len(B) >= cutSetSize and cutSetSize == 0: SS = subsets(B, cutSetSize) for s in SS: cond_s = list(set(s).union([x])) num_CI += 1 pval, _ = cond_indep_test(data, y, z, cond_s, is_discrete) if pval > alpha: PP[y, x] = 1 break_flag = True break if break_flag: break cutSetSize += 1 if PP[y, x] == -1: pdag[y, x] = -1 pdag[x, y] = 0 G[y, x] = 1 G[x, y] = 0 break DAG, pdag, G = meek(DAG, pdag, G, kvar) return pdag, num_CI
def HITON_PC(data, target, alaph, is_discrete=True): number, kVar = np.shape(data) sepset = [[] for i in range(kVar)] variDepSet = [] candidate_PC = [] PC = [] ci_number = 0 noAdmissionSet = [] max_k = 3 # use a list to store variables which are not condition independence with # target,and sorted by dep max to min candidate_Vars = [i for i in range(kVar) if i != target] for x in candidate_Vars: ci_number += 1 pval_gp, dep_gp = cond_indep_test( data, target, x, [], is_discrete) if pval_gp <= alaph: variDepSet.append([x, dep_gp]) # sorted by dep from max to min variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) # print(variDepSet) # get number by dep from max to min for i in range(len(variDepSet)): candidate_PC.append(variDepSet[i][0]) # print(candidate_PC) """ sp """ for x in candidate_PC: PC.append(x) PC_index = len(PC) # if new x add will be removed ,test will not be continue,so break the # following circulate to save time ,but i don't not why other index # improve breakFlagTwo = False while PC_index >= 0: # reverse traversal PC,and use PC_index as a pointer of PC PC_index -= 1 y = PC[PC_index] breakFlag = False conditions_Set = [i for i in PC if i != y] if len(conditions_Set) >= max_k: Slength = max_k else: Slength = len(conditions_Set) for j in range(Slength + 1): SS = subsets(conditions_Set, j) for s in SS: ci_number += 1 conditions_test_set = [i for i in s] pval_rm, dep_rm = cond_indep_test( data, target, y, conditions_test_set, is_discrete) if pval_rm > alaph: sepset[y] = [i for i in conditions_test_set] # if new x add will be removed ,test will not be # continue if y == x: breakFlagTwo = True PC.remove(y) breakFlag = True break if breakFlag: break if breakFlagTwo: break return list(set(PC)), sepset, ci_number
def IAMBnPC(data, target, alaph, is_discrete=True): CMB = [] ci_number = 0 number, kVar = np.shape(data) while True: variDepSet = [] Svariables = [i for i in range(kVar) if i != target and i not in CMB] # print(Svariables) for x in Svariables: ci_number += 1 pval, dep = cond_indep_test(data, target, x, CMB, is_discrete) # print("pval: " + str(pval)) if pval <= alaph: variDepSet.append([x, dep]) variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True) # print(variDepSet) if variDepSet == []: break else: CMB.append(variDepSet[0][0]) # print(CMB) """shrinking phase""" TestMB = CMB.copy() # whether or not sorted TestMB is not influence,just for elegant! TestMB = sorted(TestMB) p = len(TestMB) DAG = np.ones((1, p)) size = 0 continueFlag = True # conditionSet maximum set 3 max_k = 3 # target_index = TestMB.index(target) while continueFlag: # Candidate of MBs traverse for y in range(p): if DAG[0, y] == 0: continue conditionAllSet = [ i for i in range(p) if i != y and DAG[0, i] == 1 ] conditionSet = subsets(conditionAllSet, size) for S in conditionSet: condtionVari = [TestMB[i] for i in S] ci_number += 1 pval_sp, _ = cond_indep_test(data, target, TestMB[y], condtionVari, is_discrete) if pval_sp >= alaph: DAG[0, y] = 0 # print("pDAG: \n" + str(DAG)) break # print("test: \n" + str(DAG)) size += 1 continueFlag = False # circulate will be continue if condition suited if np.sum(DAG[0, :] == 1) >= size and size <= max_k: continueFlag = True # end while # print("DAG is: \n" + str(DAG)) MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1] return MB, ci_number
def MBOR(data, target, alaph, is_discrete=True): _, kVar = np.shape(data) max_k = 3 ci_number = 0 PCS, d_sep, ci_num = PCSuperSet(data, target, alaph, is_discrete) ci_number += ci_num SPS, ci_num = SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete) ci_number += ci_num MBS = list(set(PCS).union(set(SPS))) drop_data_attribute = [ str(i) for i in range(kVar) if i != target and i not in MBS ] data_new = data.drop(drop_data_attribute, axis=1) data_attribute = [i for i in range(kVar) if i == target or i in MBS] PC, ci_num = MBtoPC(data_new, target, alaph, data_attribute, is_discrete) ci_number += ci_num PCS_rmPC = [i for i in PCS if i not in PC] for x in PCS_rmPC: x_pcset, ci_num = MBtoPC(data_new, x, alaph, data_attribute, is_discrete) ci_number += ci_num if target in x_pcset: PC.append(x) SP = [] for x in PC: data_attribute = [i for i in range(kVar) if i != target] x_pcset, ci_num = MBtoPC(data, x, alaph, data_attribute, is_discrete) ci_number += ci_num vari_set = [i for i in x_pcset if i != target and i not in PC] for y in vari_set: break_flag = False condition_all_set = [i for i in MBS if i != target and i != y] clength = len(condition_all_set) if clength > max_k: clength = max_k for j in range(clength + 1): condition_set = subsets(condition_all_set, j) for Z in condition_set: ci_number += 1 pval, _ = cond_indep_test(data, target, y, Z, is_discrete) if pval > alaph: if break_flag: break else: # Find minimal Z ⊂ MBS\{T ∪ Y } such that T ⊥ Y |Z break_flag = True condition_varis = [i for i in Z] condition_varis.append(x) condition_varis = list(set(condition_varis)) ci_number += 1 pval, _ = cond_indep_test(data, target, y, condition_varis, is_discrete) if pval <= alaph: SP.append(y) if break_flag: break MB = list(set(PC).union(set(SP))) return MB, ci_number