def MI_adaptive_soft(X, Y, hm_HypoTest): Cx,X = np.unique(X, return_inverse = True) Cy,Y = np.unique(Y, return_inverse = True) X = X + 1 Y = Y + 1 m = len(Cx) n = len(Cy) joint_dist = np.ones([m,n]) * (1/(m*n)) mutual_info = 0 _,a = np.unique((np.column_stack((X, Y))),axis=0,return_counts=True) if min(a) < 3: mutual_info = mi(X,Y) if m == 1 or n == 1: mutual_info = np.inf result = [] result.append(mutual_info) result.append(joint_dist) result.append(hm_HypoTest) return result result = [] result.append(mutual_info) result.append(joint_dist) result.append(hm_HypoTest) return result if min(a) > 10: mutual_info = mi(X,Y) if m == 1 or n == 1: mutual_info = 0 result = [] result.append(mutual_info) result.append(joint_dist) result.append(hm_HypoTest) return result result = [] result.append(mutual_info) result.append(joint_dist) result.append(hm_HypoTest) return result if m == 1 or n == 1: mutual_info = np.inf #result = [] #result.append(mutual_info) #result.append(joint_dist) #result.append(hm_HypoTest) return mutual_info, joint_dist, hm_HypoTest #Estimate the joint probability mass by adaptive partitioning joint_dist, hm_HypoTest, isUniform = jointPDFAdapPartition(X, Y, m, n, hm_HypoTest) mutual_info = np.abs(get_MI_from_joint_distribution(joint_dist)) if isUniform or mutual_info <= 0.0000001: mutual_info = np.inf return mutual_info, joint_dist, hm_HypoTest
def GTest_I(X, Y): sig_level_indep = 0.05 hm_x = len(np.unique(X)) hm_y = len(np.unique(Y)) hm_samples = X.size g = 2 * hm_samples * mi(X, Y, 0) p_val = 1 - chi2.cdf(g, (hm_x - 1) * (hm_y - 1)) if p_val < sig_level_indep: Independency = 0 # reject the Null-hypothesis else: Independency = 1 return Independency
def sort_by_cmi(feat_indices, targets, cond_indices, data): """ Returns the indices found in 'feat_indices' in order of I(X;Y|Z), where Z is the joint distribution described by data[cond_indices], X is the joint distribution of data[feat_indices[i]], and Y is the joint distribution of data[targets]. If Z is empty, then the result is I(X;Y) """ feats_to_cmi = dict() if (cond_indices.size == 0): for feature in feat_indices: feats_to_cmi[feature] = mi(data[:, feature], targets) else: for feature in feat_indices: feats_to_cmi[feature] = cmi(data[:, feature], targets, joint(data[:, cond_indices])) sorted_features = np.array( sorted(feat_indices, key=lambda f: -feats_to_cmi[f])) return sorted_features
def cmi(dataVector, targetVector, conditionVector, length=0): if (conditionVector.size == 0): return mi(dataVector, targetVector, 0) if (len(conditionVector.shape) > 1 and conditionVector.shape[1] > 1): conditionVector = joint(conditionVector) cmi = 0 firstCondition = 0 secondCondition = 0 if length == 0: length = dataVector.size results = mergeArrays(targetVector, conditionVector, length) mergedVector = results[1] firstCondition = conditional_entropy(dataVector, conditionVector, length) secondCondition = conditional_entropy(dataVector, mergedVector, length) cmi = firstCondition - secondCondition return cmi
def RecognizePC(targets, ADJt, data, THRESHOLD, NumTest): MIs = [] NonPC = [] cutSetSize = 0 data_check = 1 #targets = data[:, T] Sepset = [[]] * data.shape[1] seperators = [[]] * data.shape[1] #% Search datasizeFlag = 0 while ADJt.size > cutSetSize: for xind in range(0, ADJt.size): # for each x in ADJt X = ADJt[xind] if cutSetSize == 0: NumTest = NumTest + 1 TEMP = mi(data[:, X], targets, 0) MIs.append([TEMP]) #compute mutual information #print("Vertex MI ",X,TEMP) if TEMP <= THRESHOLD: NonPC.append(X) elif cutSetSize == 1: Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize)) for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) cmbVector = joint(data[:, S]) if data_check: datasizeFlag = checkDataSize(data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 TEMP = cmi(data[:, X], targets, cmbVector, 0) MIs.append([TEMP]) if TEMP <= THRESHOLD: NonPC = set(NonPC).union(set([X])) Sepset[X] = set(Sepset[X]).union(set(S)) break else: break else: # set size > 1 Diffx = np.setdiff1d(ADJt, X) C = list(combinations(Diffx, cutSetSize - 1)) midBreakflag = 0 for sind in range(0, len(C)): # for each S in ADJT\x, size S = np.array(list(C[sind])) RestSet = np.setdiff1d(Diffx, S) for addind in range(0, RestSet.size): col = set(S).union(set([RestSet[addind]])) cmbVector = joint(data[:, np.array(list(col))]) if data_check: datasizeFlag = checkDataSize( data[:, X], targets, cmbVector) if datasizeFlag != 1: NumTest = NumTest + 1 TEMP = cmi(data[:, X], targets, cmbVector, 0) MIs.append([TEMP]) if TEMP <= THRESHOLD: NonPC = set(NonPC).union(set([X])) # Line has an error Sepset[X] = set(Sepset[X]).union( set(S), set([RestSet[addind]])) midBreakflag = 1 break else: break if midBreakflag == 1: break if len(NonPC) > 0: ADJt = np.setdiff1d(ADJt, np.array(list(NonPC))) cutSetSize = cutSetSize + 1 # print("NonPC") # print(NonPC) # print(len(NonPC)) NonPC = [] elif datasizeFlag == 1: break else: cutSetSize = cutSetSize + 1 ADJ = ADJt result = [] result.append(ADJ) result.append(Sepset) result.append(NumTest) result.append(cutSetSize) result.append(MIs) return result