def Large_Scale_IPCMB(data, targets, threshold): #data is training data without label column numfeat = data.shape[1] subsize = 100 count = 0 Feat = [] while count * subsize <= numfeat: if (count + 1) * subsize <= numfeat: sub_D = data[:, count * subsize:subsize + count * subsize] results = tian_IPCMB(sub_D, targets, threshold) index = results[0] + count * subsize Feat = set(Feat).union(set(index)) else: sub_D = data[:, count * subsize:] results = tian_IPCMB(sub_D, targets, threshold) index = results[0] + count * subsize Feat = set(Feat).union(set(index)) count = count + 1 Feat = list(Feat) #convert set object to list cmbVector = joint(data[:, Feat]) for i in np.setdiff1d(np.arange(numfeat), Feat): temp = cmi(data[:, i], targets, cmbVector) if temp > threshold: Feat.append(i) MB = Feat return np.array(MB)
def sort_by_cmi(feat_indices, targets, cond_indices, data): """ Returns the indices found in 'feat_indices' in order of I(X;Y|Z), where Z is the joint distribution described by data[cond_indices], X is the joint distribution of data[feat_indices[i]], and Y is the joint distribution of data[targets]. If Z is empty, then the result is I(X;Y) """ feats_to_cmi = dict() if (cond_indices.size == 0): for feature in feat_indices: feats_to_cmi[feature] = mi(data[:, feature], targets) else: for feature in feat_indices: feats_to_cmi[feature] = cmi(data[:, feature], targets, joint(data[:, cond_indices])) sorted_features = np.array( sorted(feat_indices, key=lambda f: -feats_to_cmi[f])) return sorted_features
def cmi(dataVector, targetVector, conditionVector, length=0): if (conditionVector.size == 0): return mi(dataVector, targetVector, 0) if (len(conditionVector.shape) > 1 and conditionVector.shape[1] > 1): conditionVector = joint(conditionVector) cmi = 0 firstCondition = 0 secondCondition = 0 if length == 0: length = dataVector.size results = mergeArrays(targetVector, conditionVector, length) mergedVector = results[1] firstCondition = conditional_entropy(dataVector, conditionVector, length) secondCondition = conditional_entropy(dataVector, mergedVector, length) cmi = firstCondition - secondCondition return cmi
def k2_slow(data, child, parents): if (parents.size == 0): return 0 child_states = data[:, child] child_states = child_states.astype(int) child_state_count = counts1d(child_states).size parent_states = data[:, parents] parent_states = parent_states.astype(int) parent_states = joint(parent_states) parent_state_counts = counts1d(parent_states) joint_states = np.column_stack((child_states, parent_states)) joint_state_counts = counts(joint_states) k2 = len(parent_state_counts) * gammaln(child_state_count) for p_count in parent_state_counts: k2 -= gammaln(p_count + child_state_count) for j_count in joint_state_counts: if (j_count > 1): k2 += gammaln(j_count + 1) return k2
def k2(child, parents, context, data, cache): parents = parents.flatten() true_child = context[child] true_parents = context[parents].tostring() joined = (true_child, true_parents) if (joined in cache.joint_cache): return cache.joint_cache[joined], cache child_cache = cache.child_cache child_states = data[:, child] child_states = child_states.astype(int) if (true_child in child_cache): child_state_count = child_cache[true_child] else: child_state_count = counts1d(child_states).size child_cache[true_child] = child_state_count parent_cache = cache.parent_cache if (true_parents in parent_cache): parent_states, parent_state_counts = parent_cache[true_parents] else: parent_states = data[:, parents] parent_states = parent_states.astype(int) parent_states = joint(parent_states) parent_state_counts = counts1d(parent_states) parent_cache[true_parents] = (parent_states, parent_state_counts) joint_states = np.column_stack((child_states, parent_states)) joint_state_counts = counts(joint_states) k2 = len(parent_state_counts) * gammaln(child_state_count) for p_count in parent_state_counts: k2 -= gammaln(p_count + child_state_count) for j_count in joint_state_counts: if (j_count > 1): k2 += gammaln(j_count + 1) cache.joint_cache[joined] = k2 return k2, cache
def RecognizePC(targets, ADJt, data, THRESHOLD, NumTest): MIs = [] NonPC = [] cutSetSize = 0 data_check = 1 #targets = data[:, T] Sepset = [[]] * data.shape[1] seperators = [[]] * data.shape[1] #% Search datasizeFlag = 0 while ADJt.size > cutSetSize: for xind in range(0, ADJt.size): # for each x in ADJt X = ADJt[xind] if cutSetSize == 0: NumTest = NumTest + 1 TEMP = mi(data[:, X], targets, 0) MIs.append([TEMP]) #compute mutual information #print("Vertex MI ",X,TEMP) if TEMP <= THRESHOLD: NonPC.append(X) elif cutSetSize == 1: Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize)) for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) cmbVector = joint(data[:, S]) if data_check: datasizeFlag = checkDataSize(data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 TEMP = cmi(data[:, X], targets, cmbVector, 0) MIs.append([TEMP]) if TEMP <= THRESHOLD: NonPC = set(NonPC).union(set([X])) Sepset[X] = set(Sepset[X]).union(set(S)) break else: break else: # set size > 1 Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize - 1)) midBreakflag = 0 for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) RestSet = np.setdiff1d(Diffx, S) for addind in range(0, RestSet.size): col = set(S).union(set([RestSet[addind]])) cmbVector = joint(data[:, np.array(list(col))]) if data_check: datasizeFlag = checkDataSize( data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 TEMP = cmi(data[:, X], targets, cmbVector, 0) MIs.append([TEMP]) if TEMP <= THRESHOLD: NonPC = set(NonPC).union(set([X])) # Line has an error Sepset[X] = set(Sepset[X]).union( set(S), set([RestSet[addind]])) midBreakflag = 1 break else: break if midBreakflag == 1: break if len(NonPC) > 0: ADJt = np.setdiff1d(ADJt, np.array(list(NonPC))) cutSetSize = cutSetSize + 1 # print("NonPC") # print(NonPC) # print(len(NonPC)) NonPC = [] elif datasizeFlag == 1: break else: cutSetSize = cutSetSize + 1 ADJ = ADJt result = [] result.append(ADJ) result.append(Sepset) result.append(NumTest) result.append(cutSetSize) result.append(MIs) return result
def CMI_adaptive_pure_soft(X, Y, cond_set, hm_HypoTest): cond_mi = 0 if (len(cond_set.shape) == 1): cond_set = cond_set.reshape((cond_set.size,1)) if (cond_set.size == 0): results = MI_adaptive_soft(X, Y, hm_HypoTest) cond_mi = results[0] hm_HypoTest = results[1] results = [] results.append(cond_mi) results.append(hm_HypoTest) return results naive_cmi = cmi(X, Y, cond_set) if naive_cmi == 0: results = [] results.append(cond_mi) results.append(hm_HypoTest) return results Cx, X = np.unique(X, return_inverse = True) Cy, Y = np.unique(Y, return_inverse = True) m = len(Cx) n = len(Cy) hm_sample, hm_condvar = cond_set.shape entire_uniform = 1 if hm_condvar == 1: combo_set = np.unique(cond_set) j = [] for i in range(combo_set.shape[0]): pattern = combo_set[i] sub_cond_idx = np.argwhere(cond_set==pattern).T[0] p_cond = len(sub_cond_idx) / hm_sample sub_cond_idx = np.array(sub_cond_idx) results = MI_adaptive_soft(X[sub_cond_idx], Y[sub_cond_idx], hm_HypoTest) temp_mi = results[0] hm_HypoTest = results[2] if temp_mi == np.inf: temp_mi = 0 else: entire_uniform = 0 cond_mi = cond_mi + p_cond*temp_mi else: var_1 = cond_set[:,1] var_2 = joint(cond_set[:, 2:]) C1,var_1 = np.unique(X, return_inverse = True) C2,var_2 = np.unique(Y, return_inverse = True) #C1 = np.unique(X, return_inverse = True) #C2 = np.unique(Y, return_inverse = True) p = len(C1) q = len(C2) joint_set, hm_HypoTest, isUniform = jointPDFAdapPartition(var_1, var_2, p, q, hm_HypoTest) for j in range(p): for k in range(q): get_indexes = lambda x, xs: [i for (y, i) in zip(xs, range(len(xs))) if y == x] index = get_indexes(C1[j], var_1) index = np.array(index) sub_cond_idx = get_indexes(C2[k], var_2[index]) sub_cond_idx = np.array(sub_cond_idx) sub_cond_idx = sub_cond_idx.astype(int) p_cond = len(sub_cond_idx) / hm_sample if len(sub_cond_idx) == 0: temp_mi = 0 else: results = MI_adaptive_soft(X[sub_cond_idx], Y[sub_cond_idx], hm_HypoTest) temp_mi = results[0] hm_HypoTest = results[2] if temp_mi == np.inf: temp_mi = 0 else: entire_uniform = 0 cond_mi = cond_mi + p_cond*temp_mi if entire_uniform: cond_mi = np.inf return cond_mi, hm_HypoTest
def RecognizePC_Gtest(targets, ADJt, data): NonPC = [] cutSetSize = 0 data_check = 0 Sepset = [[]]*data.shape[1] #% Search datasizeFlag = 0 while ADJt.size > cutSetSize: for xind in range(0, ADJt.size): # for each x in ADJt X = ADJt[xind] if cutSetSize == 0: Independency = GTest_I(data[:,X], targets) if Independency == 1: NonPC.append(X) elif cutSetSize == 1: Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize)) for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) cmbVector = joint(data[:, S]) if data_check: datasizeFlag = checkDataSize(data[:, X], targets, cmbVector) if datasizeFlag != 1: Independency = GTest_CI(data[:,X], targets, cmbVector) if Independency == 1: NonPC = set(NonPC).union(set([X])) Sepset[X] = set(Sepset[X]).union(set(S)) break else: break else: # set size > 1 Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize - 1)) midBreakflag = 0 for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) RestSet = np.setdiff1d(Diffx, S); for addind in range(0, RestSet.size): col = set(S).union(set([RestSet[addind]])) cmbVector = joint(data[:, np.array(list(col))]) if data_check: datasizeFlag = checkDataSize(data[:, X], targets, cmbVector) if datasizeFlag != 1: Independency = GTest_CI(data[:,X], targets, cmbVector) if Independency == 1: NonPC = set(NonPC).union(set([X])) # Line has an error Sepset[X] = set(Sepset[X]).union(set(S),set([RestSet[addind]])) midBreakflag = 1 break else: break if midBreakflag == 1: break if len(NonPC) > 0: ADJt = np.setdiff1d(ADJt, np.array(list(NonPC))) cutSetSize = cutSetSize + 1 NonPC = [] elif datasizeFlag == 1: break else: cutSetSize = cutSetSize + 1 ADJ = ADJt result = [] result.append(ADJ) result.append(Sepset) result.append(cutSetSize) return result
def Keyi_STMB_Adaptive_soft(train_data, targets, threshold = 0.02): NumTest = 0 hm_HypoTest = 0 numf = train_data.shape[1] # feature number # %% Recognize Target PC CanMB = np.arange(numf) # candidates data_check = 1 PCD, Sepset_t, NumTest, cutSetSize, hm_HypoTest = find_PC_adpative(targets, CanMB, train_data, threshold, NumTest, hm_HypoTest) spouse = [[]]*numf #scores = [] Des = [[]]*PCD.size datasizeFlag = 1 #%% Find Markov blanket for yind in range(0, PCD.size): flag = 0 y = PCD[yind] searchset = np.setdiff1d(CanMB, PCD) for xind in range(0, searchset.size): x = searchset[xind] col = set(Sepset_t[x]).union(set([y])) cmbVector = joint(train_data[:, np.array(list(col))]) if data_check == 1: datasizeFlag = Keyi_checkDataSize(train_data[:,x], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 cond_data = train_data[:, np.array(list(col))] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft(train_data[:,x], targets, cond_data, hm_HypoTest) if cond_mi > threshold: # v structure temp = set(PCD).union(set([x])) for s in np.setdiff1d(np.array(list(temp)), y): cond_data = train_data[:,s] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft(train_data[:,y], targets, cond_data, hm_HypoTest) if cond_mi < threshold: temp = set(Des[yind]).union(set([y])) Des[yind] = np.array(list(temp)) flag = 1; break else: temp = set(spouse[y]).union(set([x])) spouse[y]= np.array(list(temp)) if flag == 1: break PCD = np.setdiff1d(PCD, Des[:]) #%% Shrink spouse NonS = [] S = [] for i in np.setdiff1d(np.arange(numf), PCD): spouse[i] = [] # empty for y in np.arange(len(spouse)): if spouse[y] != []: S.append( y) # Y has spouses # shrink spousecan = spouse[y] for sind in np.arange(spousecan.size): s = spousecan[sind] col = set([y]).union(set(spousecan),set(PCD)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), s)]) if data_check == 1: datasizeFlag = 0 datasizeFlag = Keyi_checkDataSize(train_data[:,s], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 cond_data = train_data[:, np.setdiff1d(np.array(list(col)), s)] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft(train_data[:,s], targets, cond_data, hm_HypoTest) if cond_mi < threshold: NonS = set(NonS).union(set([s])) spouse[y] = np.setdiff1d(spousecan, np.array(list(NonS))) NonS = [] b = []; for i in np.arange(len(spouse)): if spouse[i] != []: b = set(b).union(set(spouse[i])) # remove false spouse from PC M = PCD # setdiff(PCD,S); % M has no spouses in PCD set PCsize = M.size testSet = set(S).union(set(b)) #testSet = np.array(list(temp)) C = np.zeros(shape = (PCsize, 1)) for x in M: col = set(PCD).union(set(testSet)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), x)]) if data_check == 1: datasizeFlag = 0 datasizeFlag = Keyi_checkDataSize(train_data[:, x], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 cond_data = train_data[:, np.setdiff1d(np.array(list(col)), x)] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft(train_data[:,x], targets, cond_data, hm_HypoTest) if cond_mi < threshold: PCD = np.setdiff1d(PCD, x) datasizeFlag = 0 PCsize2 =np.mean(C) MB = set(PCD).union(set(b)) result = [] result.append(np.array(list(MB))) result.append(PCD) result.append(spouse) result.append(NumTest) result.append(cutSetSize) result.append(PCsize) result.append(PCsize2) return result
def STMB_BayesFactor(train_data, targets, bayesfactors): numf = train_data.shape[1] # feature number #targets = data[:, targetindex] # selected index data # %% Recognize Target PC CanMB = np.arange(numf) # candidates PCD, Sepset_t, cutSetSize = RecognizePC_BayesFactor(targets, CanMB, train_data, bayesfactors) spouse = [[]]*numf #print("===========PC Result==========") #print(PCD) # print(Sepset_t) # print(cutSetSize) #scores = [] Des = [[]]*PCD.size datasizeFlag = 0 #%% Find Markov blanket for yind in range(PCD.size): flag = 0 y = PCD[yind] searchset = np.setdiff1d(CanMB, PCD) for xind in range(searchset.size): x = searchset[xind] col = set(Sepset_t[x]).union(set([y])) cmbVector = joint(train_data[:, np.array(list(col))]) # datasizeFlag = checkDataSize(train_data[:, x], targets, cmbVector) #print("datasizeFlag",x,datasizeFlag) if datasizeFlag != 1: Independency = BF_CI(train_data[:,x], targets, cmbVector, bayesfactors) if Independency == 0: # V structure for s in np.setdiff1d(np.union1d(PCD,[x]), np.array([y])): Independency = BF_CI(train_data[:,y], targets, train_data[:,s], bayesfactors) if Independency == 1: temp = set(Des[yind]).union(set([y])) Des[yind] = np.array(list(temp)) flag = 1 break else: temp = set(spouse[y]).union(set([x])) spouse[y]= np.array(list(temp)) if flag == 1: break des = [item for sublist in Des for item in sublist] PCD = np.setdiff1d(PCD, des) #print(PCD) #assert(1==2) #%% Shrink spouse NonS = [] S = [] for i in np.setdiff1d(np.arange(numf), PCD): spouse[i] = [] # empty for y in np.arange(len(spouse)): if spouse[y] != []: S.append( y) # Y has spouses # shrink spousecan = spouse[y] for sind in np.arange(spousecan.size): s = spousecan[sind] col = set([y]).union(set(spousecan),set(PCD)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), s)]) # datasizeFlag = checkDataSize(train_data[:, s], targets, cmbVector) if datasizeFlag != 1: Independency = BF_CI(train_data[:,s], targets, cmbVector, bayesfactors) if Independency == 1: NonS = set(NonS).union(set([s])) spouse[y] = np.setdiff1d(spousecan, np.array(list(NonS))) NonS = [] b = [] for i in range(len(spouse)): if len(spouse[i]) > 0: b = set(b).union(set(spouse[i])) # remove false spouse from PC M = PCD # setdiff(PCD,S); % M has no spouses in PCD set PCsize = M.size testSet = set(S).union(set(b)) #testSet = np.array(list(temp)) C = np.zeros(shape = (PCsize, 1)) for x in M: col = set(PCD).union(set(testSet)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), x)]) # datasizeFlag = checkDataSize(train_data[:, x], targets, cmbVector) if datasizeFlag != 1: Independency = BF_CI(train_data[:,x], targets, cmbVector, bayesfactors) if Independency == 1: PCD = np.setdiff1d(PCD, x) PCsize2 =np.mean(C) MB = set(PCD).union(set(b)) # result = [] # result.append(np.array(list(MB))) # result.append(PCD) # result.append(spouse) # result.append(Sepset_t) # result.append(cutSetSize) # result.append(PCsize) # result.append(PCsize2) return np.array(list(MB)), PCD, spouse, Sepset_t, cutSetSize, PCsize, PCsize2
def tian_STMB_new(train_data, targets, threshold = 0.02): NumTest = 0 numf = train_data.shape[1] # feature number #targets = data[:, targetindex] # selected index data # %% Recognize Target PC CanMB = np.arange(numf) # candidates Results = RecognizePC(targets, CanMB, train_data, threshold, NumTest) PCD = Results[0] Sepset_t = Results[1] NumTest = Results[2] cutSetSize = Results[3] spouse = [[]]*numf #print("===========PC Result==========") #print(PCD) # print(Sepset_t) # print(cutSetSize) #scores = [] Des = [[]]*PCD.size datasizeFlag = 0 #%% Find Markov blanket for yind in range(PCD.size): flag = 0 y = PCD[yind] searchset = np.setdiff1d(CanMB, PCD) for xind in range(searchset.size): x = searchset[xind] col = set(Sepset_t[x]).union(set([y])) cmbVector = joint(train_data[:, np.array(list(col))]) datasizeFlag = checkDataSize(train_data[:, x], targets, cmbVector) #print("datasizeFlag",x,datasizeFlag) if datasizeFlag != 1: NumTest = NumTest + 1 T = cmi(train_data[:, x], targets, cmbVector, 0) #print("CMI",y,x,T) if T > threshold: # v structure for s in np.setdiff1d(np.union1d(PCD,[x]), np.array([y])): T = cmi(train_data[:, y], targets, train_data[:, s], 0) #print("Vertex CMI",s,y,x,T) if T < threshold: temp = set(Des[yind]).union(set([y])) Des[yind] = np.array(list(temp)) flag = 1 break else: temp = set(spouse[y]).union(set([x])) spouse[y]= np.array(list(temp)) if flag == 1: break des = [item for sublist in Des for item in sublist] PCD = np.setdiff1d(PCD, des) #print(PCD) #assert(1==2) #%% Shrink spouse NonS = [] S = [] for i in np.setdiff1d(np.arange(numf), PCD): spouse[i] = [] # empty for y in np.arange(len(spouse)): if spouse[y] != []: S.append( y) # Y has spouses # shrink spousecan = spouse[y] for sind in np.arange(spousecan.size): s = spousecan[sind] col = set([y]).union(set(spousecan),set(PCD)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), s)]) datasizeFlag = checkDataSize(train_data[:, s], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 T = cmi(train_data[:, s], targets, cmbVector, 0) if T < threshold: NonS = set(NonS).union(set([s])) spouse[y] = np.setdiff1d(spousecan, np.array(list(NonS))) NonS = [] b = [] for i in range(len(spouse)): if len(spouse[i]) > 0: b = set(b).union(set(spouse[i])) # remove false spouse from PC M = PCD # setdiff(PCD,S); % M has no spouses in PCD set PCsize = M.size testSet = set(S).union(set(b)) #testSet = np.array(list(temp)) C = np.zeros(shape = (PCsize, 1)) for x in M: col = set(PCD).union(set(testSet)) cmbVector = joint(train_data[:, np.setdiff1d(np.array(list(col)), x)]) datasizeFlag = checkDataSize(train_data[:, x], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 T = cmi(train_data[:, x], targets, cmbVector, 0) if T < threshold: PCD = np.setdiff1d(PCD, x) PCsize2 =np.mean(C) MB = set(PCD).union(set(b)) result = [] result.append(np.array(list(MB))) result.append(PCD) result.append(spouse) result.append(NumTest) result.append(Sepset_t) result.append(cutSetSize) result.append(PCsize) result.append(PCsize2) return result
def find_PC_adpative(targets, ADJt, data, THRESHOLD, NumTest, hm_HypoTest): MIs = [] CMIs = [] NonPC = [] cutSetSize = 0 data_check = 1 #targets = data[:, T] Sepset = [[]] * data.shape[1] #% Search datasizeFlag = 0 while ADJt.size > cutSetSize: for xind in range(0, ADJt.size): # for each x in ADJt X = ADJt[xind] if cutSetSize == 0: NumTest = NumTest + 1 marg_mi, _, hm_HypoTest = MI_adaptive_soft( data[:, X], targets, hm_HypoTest) MIs.append([marg_mi]) #compute mutual information if marg_mi <= THRESHOLD: NonPC.append(X) elif cutSetSize == 1: Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize)) for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) cmbVector = joint(data[:, S]) if data_check: datasizeFlag = Keyi_checkDataSize( data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 cond_data = data[:, S] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft( data[:, X], targets, cond_data, hm_HypoTest) CMIs.append([cond_mi]) if cond_mi <= THRESHOLD: NonPC = set(NonPC).union(set([X])) Sepset[X] = set(Sepset[X]).union(set(S)) break else: break else: # set size > 1 Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize - 1)) midBreakflag = 0 for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) RestSet = np.setdiff1d(Diffx, S) for addind in range(0, RestSet.size): col = set(S).union(set([RestSet[addind]])) cmbVector = joint(data[:, np.array(list(col))]) if data_check: datasizeFlag = Keyi_checkDataSize( data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 cond_data = data[:, np.array(list(col))] cond_mi, hm_HypoTest = CMI_adaptive_pure_soft( data[:, X], targets, cond_data, hm_HypoTest) CMIs.append([cond_mi]) if cond_mi <= THRESHOLD: NonPC = set(NonPC).union(set([X])) Sepset[X] = set(Sepset[X]).union( set(S), set([RestSet[addind]])) midBreakflag = 1 break else: break if midBreakflag == 1: break if len(NonPC) > 0: ADJt = np.setdiff1d(ADJt, NonPC) cutSetSize = cutSetSize + 1 NonPC = [] elif datasizeFlag == 1: break else: cutSetSize = cutSetSize + 1 ADJ = ADJt result = [] result.append(ADJ) result.append(Sepset) result.append(NumTest) result.append(cutSetSize) result.append(MIs) return result
def tian_IPCMB( train_data, target, threshold ): #train_data is not including targets, targets is the label vector NumTest = 0 numSample = train_data.shape[0] numf = train_data.shape[1] # do not include the target CanMB = np.arange(numf) #target = target.reshape([numSample,1]) Results = RecognizePC(target, CanMB, train_data, threshold, NumTest) PC = Results[0] Sepset_t = Results[1] NumTest = Results[2] #cutSetSize = Results[3] MB = PC #association = [] #Recognize a true positive, and its PC as spouse candidate children = [] targetindex = 0 for xind in np.arange(len(PC)): X = PC[xind] CanADJX = np.arange(numf) rest_idx = np.setdiff1d(np.arange(numf), X) #numf-1 temp_trainD = np.hstack((target, train_data[:, rest_idx])) Results = RecognizePC(train_data[:, X], CanADJX, temp_trainD, threshold, NumTest) temp_CanSP = Results[0] NumTest = Results[2] if ~np.in1d(targetindex, temp_CanSP): MB = np.setdiff1d(MB, X) continue temp_idx = np.where(temp_CanSP != 0) CanSP = temp_CanSP[temp_idx] temp_idx = np.where(CanSP <= X) CanSP[temp_idx] = CanSP[temp_idx] - 1 # recognize true positives DiffY = np.setdiff1d(CanSP, MB) # in CanSP but not in MB DiffY = np.setdiff1d(DiffY, X) # X should not in Sepset for yind in np.arange(len(DiffY)): Y = DiffY[yind] SepsetTY = Sepset_t[Y] cmbVector = joint(train_data[:, list(set(SepsetTY).union(set([X])))]) NumTest = NumTest + 1 if cmi(train_data[:, Y], target, cmbVector, 0) > threshold: children = set(children).union(set([X])) children = list(children) MB = set(MB).union(set([Y])) MB = list(MB) result = [] result.append(np.array(MB)) result.append(PC) result.append(NumTest) result.append(children) return result