예제 #1
0
def MI_adaptive_soft(X, Y, hm_HypoTest):
    
    Cx,X = np.unique(X, return_inverse = True)
    Cy,Y = np.unique(Y, return_inverse = True)
    
    X = X + 1
    Y = Y + 1
    
    m = len(Cx)
    n = len(Cy)
        
    joint_dist = np.ones([m,n]) * (1/(m*n))
    mutual_info = 0
    _,a = np.unique((np.column_stack((X, Y))),axis=0,return_counts=True)
    if min(a) < 3:
        mutual_info = mi(X,Y)
        if m == 1 or n == 1:
            mutual_info = np.inf
            result = []
            result.append(mutual_info)
            result.append(joint_dist)
            result.append(hm_HypoTest)
            return result
        
        result = []
        result.append(mutual_info)
        result.append(joint_dist)
        result.append(hm_HypoTest)
        return result
    
    if min(a) > 10:
        mutual_info = mi(X,Y)
        if m == 1 or n == 1:
            mutual_info = 0
            result = []
            result.append(mutual_info)
            result.append(joint_dist)
            result.append(hm_HypoTest)
            return result
        
        result = []
        result.append(mutual_info)
        result.append(joint_dist)
        result.append(hm_HypoTest)
        return result
    
    if m == 1 or n == 1:
        mutual_info = np.inf
        #result = []
        #result.append(mutual_info)
        #result.append(joint_dist)
        #result.append(hm_HypoTest)
        return mutual_info, joint_dist, hm_HypoTest
    #Estimate the joint probability mass by adaptive partitioning
    joint_dist, hm_HypoTest, isUniform = jointPDFAdapPartition(X, Y, m, n, hm_HypoTest)
    
    mutual_info = np.abs(get_MI_from_joint_distribution(joint_dist))
    if isUniform or mutual_info <= 0.0000001:
        mutual_info = np.inf
    return mutual_info, joint_dist, hm_HypoTest
예제 #2
0
def GTest_I(X, Y):
    sig_level_indep = 0.05
    hm_x = len(np.unique(X))
    hm_y = len(np.unique(Y))

    hm_samples = X.size
    g = 2 * hm_samples * mi(X, Y, 0)

    p_val = 1 - chi2.cdf(g, (hm_x - 1) * (hm_y - 1))

    if p_val < sig_level_indep:
        Independency = 0  # reject the Null-hypothesis
    else:
        Independency = 1

    return Independency
예제 #3
0
def sort_by_cmi(feat_indices, targets, cond_indices, data):
    """
    Returns the indices found in 'feat_indices' in order of I(X;Y|Z), where Z 
      is the joint distribution described by data[cond_indices], X is the joint
      distribution of data[feat_indices[i]], and Y is the joint distribution of 
      data[targets]. If Z is empty, then the result is I(X;Y) 
  """
    feats_to_cmi = dict()
    if (cond_indices.size == 0):
        for feature in feat_indices:
            feats_to_cmi[feature] = mi(data[:, feature], targets)
    else:
        for feature in feat_indices:
            feats_to_cmi[feature] = cmi(data[:, feature], targets,
                                        joint(data[:, cond_indices]))

    sorted_features = np.array(
        sorted(feat_indices, key=lambda f: -feats_to_cmi[f]))
    return sorted_features
예제 #4
0
def cmi(dataVector, targetVector, conditionVector, length=0):
    if (conditionVector.size == 0):
        return mi(dataVector, targetVector, 0)
    if (len(conditionVector.shape) > 1 and conditionVector.shape[1] > 1):
        conditionVector = joint(conditionVector)
    cmi = 0
    firstCondition = 0
    secondCondition = 0

    if length == 0:
        length = dataVector.size

    results = mergeArrays(targetVector, conditionVector, length)
    mergedVector = results[1]

    firstCondition = conditional_entropy(dataVector, conditionVector, length)
    secondCondition = conditional_entropy(dataVector, mergedVector, length)
    cmi = firstCondition - secondCondition

    return cmi
def RecognizePC(targets, ADJt, data, THRESHOLD, NumTest):
    MIs = []
    NonPC = []
    cutSetSize = 0
    data_check = 1
    #targets = data[:, T]
    Sepset = [[]] * data.shape[1]
    seperators = [[]] * data.shape[1]
    #% Search
    datasizeFlag = 0
    while ADJt.size > cutSetSize:
        for xind in range(0, ADJt.size):  # for each x in ADJt
            X = ADJt[xind]
            if cutSetSize == 0:
                NumTest = NumTest + 1
                TEMP = mi(data[:, X], targets, 0)
                MIs.append([TEMP])  #compute mutual information
                #print("Vertex MI ",X,TEMP)
                if TEMP <= THRESHOLD:
                    NonPC.append(X)
            elif cutSetSize == 1:
                Diffx = np.setdiff1d(ADJt, X)
                C = list(combinations(Diffx, cutSetSize))
                for sind in range(0, len(C)):  # for each S in ADJT\x, size
                    S = np.array(list(C[sind]))
                    cmbVector = joint(data[:, S])
                    if data_check:
                        datasizeFlag = checkDataSize(data[:, X], targets,
                                                     cmbVector)
                    if datasizeFlag != 1:
                        NumTest = NumTest + 1
                        TEMP = cmi(data[:, X], targets, cmbVector, 0)
                        MIs.append([TEMP])
                        if TEMP <= THRESHOLD:
                            NonPC = set(NonPC).union(set([X]))
                            Sepset[X] = set(Sepset[X]).union(set(S))
                            break
                    else:
                        break
            else:  # set size > 1
                Diffx = np.setdiff1d(ADJt, X)
                C = list(combinations(Diffx, cutSetSize - 1))
                midBreakflag = 0
                for sind in range(0, len(C)):  # for each S in ADJT\x, size
                    S = np.array(list(C[sind]))
                    RestSet = np.setdiff1d(Diffx, S)
                    for addind in range(0, RestSet.size):
                        col = set(S).union(set([RestSet[addind]]))
                        cmbVector = joint(data[:, np.array(list(col))])
                        if data_check:
                            datasizeFlag = checkDataSize(
                                data[:, X], targets, cmbVector)
                        if datasizeFlag != 1:
                            NumTest = NumTest + 1
                            TEMP = cmi(data[:, X], targets, cmbVector, 0)
                            MIs.append([TEMP])
                            if TEMP <= THRESHOLD:
                                NonPC = set(NonPC).union(set([X]))
                                # Line has an error
                                Sepset[X] = set(Sepset[X]).union(
                                    set(S), set([RestSet[addind]]))
                                midBreakflag = 1
                                break
                        else:
                            break
                    if midBreakflag == 1:
                        break
        if len(NonPC) > 0:
            ADJt = np.setdiff1d(ADJt, np.array(list(NonPC)))
            cutSetSize = cutSetSize + 1
            # print("NonPC")
            # print(NonPC)
            # print(len(NonPC))
            NonPC = []
        elif datasizeFlag == 1:
            break
        else:
            cutSetSize = cutSetSize + 1

    ADJ = ADJt

    result = []
    result.append(ADJ)
    result.append(Sepset)
    result.append(NumTest)
    result.append(cutSetSize)
    result.append(MIs)

    return result