예제 #1
0
def RecognizePC(data, target, ADJT, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    NonPC = []
    cutSetSize = 0
    sepset = [[] for i in range(kVar)]
    ci_number = 0
    while len(ADJT) > cutSetSize:
        for x in ADJT:
            ADJT_X = [i for i in ADJT if i != x]
            SSubset = subsets(ADJT_X, cutSetSize)
            for S in SSubset:
                ci_number += 1
                pval_gp, dep_gp = cond_indep_test(data, target, x, S,
                                                  is_discrete)
                if pval_gp > alaph:
                    NonPC.append(x)
                    sepset[x] = [i for i in S]
                    break
        if len(NonPC) > 0:
            ADJT = [i for i in ADJT if i not in NonPC]
            cutSetSize += 1
            NonPC = []
        else:
            break

    return ADJT, sepset, ci_number
예제 #2
0
def getMinDep(data, target, x, CPC, alpha, is_discrete):

    """this function is to chose min dep(association) about Target,x|(subsets of CPC)"""

    ci_number = 0
    dep_min = float("inf")
    max_k = 3
    # 在这图中很少一个节点的Perents或child(其中一个)超过三个,即最多图中a->b,c,d->z,所以最多条件集三个(a,z)|(b,c,d)
    # 便可测试出(a,z)是否独立,可极大得减少时间复杂度
    if len(CPC) > max_k:
        k_length = max_k
    else:
        k_length = len(CPC)
    for i in range(k_length+1):
        SS = subsets(CPC, i)
        for S in SS:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, S, is_discrete)
            # this judge about target and x whether or not is condition independence ,if true,dep must be zero,
            # and end operating of function of getMinDep
            if pval > alpha:
                return 0, S ,ci_number
            if dep_min > dep:
                dep_min = dep
    return dep_min, None, ci_number
예제 #3
0
def pc_simple(data, target, alaph, isdiscrete):
    number, kVar = np.shape(data)
    ciTest = 0
    k = 0

    # chose all variables except target itself
    PC = [i for i in range(kVar) if i != target]

    while len(PC) > k:

        PC_temp = PC.copy()
        for x in PC_temp:
            # see number of circulate
            condition_subsets = [i for i in PC_temp if i != x]
            if len(condition_subsets) >= k:
                # get a difinite number of subsets of condition_subsets
                css = subsets(condition_subsets, k)
                for s in css:
                    # every k length of subsets should test chi square and if
                    # make x and target CI,x removed
                    pval, dep = cond_indep_test(data, x, target, s, isdiscrete)
                    ciTest += 1
                    if pval > alaph:

                        PC.remove(x)
                        break  # end circulate of s
        k += 1

    return PC, ciTest
예제 #4
0
def TIE_p(data, target, alaph, isdiscrete):
    number, kVar = np.shape(data)
    M = []
    G = []
    max_k = 3
    not_in_set = []
    possible_subests = []
    variable = [i for i in range(kVar)]
    MB, _ = IAMB(data, target, alaph, variable, isdiscrete)

    accurary_MB = eva_classifier(data, target, MB)
    M.append(MB)
    G.append([])
    index = 0
    s_index = 0
    MB_new_set = []
    while True:
        length = len(M[index])
        if length > max_k:
            length = max_k
        for j in range(length + 1):
            if j == 0:
                continue
            varis_set = subsets(M[index], j)
            # print("varis_set is: " + str(varis_set))
            for x in varis_set:
                break_Flag = False
                for y in not_in_set:
                    if set(x).issuperset(set(y)):
                        # print("x is: " + str(x) + " , y is: " + str(y))
                        break_Flag = True
                        break
                if not break_Flag:
                    vari_one = list(set(x).union(set(G[index])))
                    # print("vari_one is: " + str(vari_one))
                    if vari_one not in possible_subests:
                        possible_subests.append(vari_one)
                        # print("possible_subsets is: " + str(possible_subests))

        if s_index < len(possible_subests):
            excpet_varis_set = possible_subests[s_index]
            s_index += 1
        else:
            break

        variable_new = [i for i in range(kVar) if i not in excpet_varis_set]
        MB_new, _ = IAMB(data, target, alaph, variable_new, isdiscrete)
        if MB_new == [] or MB_new in MB_new_set:
            continue

        # accurary_MB = eva_classifier(data, target, MB)
        accurary_MB_new = eva_classifier(data, target, MB_new)

        if accurary_MB <= accurary_MB_new:
            MB_new_set.append(MB_new)
            M.append(MB_new)
            G.append(excpet_varis_set)
            index += 1
    return MB_new_set
예제 #5
0
파일: MBOR.py 프로젝트: jhj111/pyCausalFS
def MBtoPC(data, target, alaph, attribute, is_discrete):
    max_k = 3
    ci_number = 0
    MB, ci_num = IAMB(data, target, alaph, attribute, is_discrete)
    ci_number += ci_num
    PC = MB.copy()
    for x in MB:
        break_flag = False
        condtion_sets_all = [i for i in MB if i != x]
        c_length = len(condtion_sets_all)
        if c_length > max_k:
            c_length = max_k
        for j in range(c_length + 1):
            condtion_sets = subsets(condtion_sets_all, j)
            for Z in condtion_sets:
                ci_number += 1
                pval, _ = cond_indep_test(data, target, x, Z, is_discrete)
                if pval > alaph:
                    PC.remove(x)
                    break_flag = True
                    break
            if break_flag:
                break
    return PC, ci_number
예제 #6
0
def HITON_PC(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    sepset = [[] for i in range(kVar)]
    variDepSet = []
    candidate_PC = []
    PC = []
    ci_number = 0
    noAdmissionSet = []
    max_k = 3

    # use a list to store variables which are not condition independence with
    # target,and sorted by dep max to min
    candidate_Vars = [i for i in range(kVar) if i != target]
    for x in candidate_Vars:
        ci_number += 1
        pval_gp, dep_gp = cond_indep_test(
            data, target, x, [], is_discrete)
        if pval_gp <= alaph:
            variDepSet.append([x, dep_gp])

    # sorted by dep from max to min
    variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
    # print(variDepSet)

    # get number by dep from max to min
    for i in range(len(variDepSet)):
        candidate_PC.append(variDepSet[i][0])
    # print(candidate_PC)

    """ sp """
    for x in candidate_PC:

        PC.append(x)
        PC_index = len(PC)
        # if new x add will be removed ,test will not be continue,so break the
        # following circulate to save time ,but i don't not why other index
        # improve
        breakFlagTwo = False

        while PC_index >= 0:
            #  reverse traversal PC,and use PC_index as a pointer of PC
            PC_index -= 1
            y = PC[PC_index]
            breakFlag = False
            conditions_Set = [i for i in PC if i != y]

            if len(conditions_Set) >= max_k:
                Slength = max_k
            else:
                Slength = len(conditions_Set)

            for j in range(Slength + 1):
                SS = subsets(conditions_Set, j)
                for s in SS:
                    ci_number += 1
                    conditions_test_set = [i for i in s]
                    pval_rm, dep_rm = cond_indep_test(
                        data, target, y, conditions_test_set, is_discrete)
                    if pval_rm > alaph:
                        sepset[y] = [i for i in conditions_test_set]
                        # if new x add will be removed ,test will not be
                        # continue
                        if y == x:
                            breakFlagTwo = True
                        PC.remove(y)
                        breakFlag = True
                        break

                if breakFlag:
                    break
            if breakFlagTwo:
                break

    return list(set(PC)), sepset, ci_number
예제 #7
0
def IAMBnPC(data, target, alaph, is_discrete=True):
    CMB = []
    ci_number = 0
    number, kVar = np.shape(data)

    while True:
        variDepSet = []
        Svariables = [i for i in range(kVar) if i != target and i not in CMB]
        # print(Svariables)
        for x in Svariables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, CMB, is_discrete)
            # print("pval: " + str(pval))
            if pval <= alaph:
                variDepSet.append([x, dep])
        variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
        # print(variDepSet)
        if variDepSet == []:
            break
        else:
            CMB.append(variDepSet[0][0])
            # print(CMB)
    """shrinking phase"""
    TestMB = CMB.copy()
    # whether or not sorted TestMB  is not influence,just for elegant!
    TestMB = sorted(TestMB)
    p = len(TestMB)
    DAG = np.ones((1, p))
    size = 0
    continueFlag = True
    # conditionSet maximum set 3
    max_k = 3
    # target_index = TestMB.index(target)
    while continueFlag:
        # Candidate of MBs traverse
        for y in range(p):
            if DAG[0, y] == 0:
                continue
            conditionAllSet = [
                i for i in range(p) if i != y and DAG[0, i] == 1
            ]
            conditionSet = subsets(conditionAllSet, size)
            for S in conditionSet:
                condtionVari = [TestMB[i] for i in S]
                ci_number += 1
                pval_sp, _ = cond_indep_test(data, target, TestMB[y],
                                             condtionVari, is_discrete)
                if pval_sp >= alaph:
                    DAG[0, y] = 0
                    # print("pDAG: \n" + str(DAG))
                    break
        # print("test: \n" + str(DAG))
        size += 1
        continueFlag = False

        # circulate will be continue if condition suited
        if np.sum(DAG[0, :] == 1) >= size and size <= max_k:
            continueFlag = True
    # end while

    # print("DAG is: \n" + str(DAG))
    MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1]

    return MB, ci_number
예제 #8
0
def MBbyMB(data, target, alpha, is_discrete=True):

    ci_test = 0
    max_k = 3
    _, kvar = np.shape(data)
    DAG = np.zeros((kvar, kvar))
    pdag = DAG.copy()
    G = DAG.copy()
    mb_calcualted = [True for i in range(kvar)]
    all_pc = [[] for i in range(kvar)]
    all_mb = [[] for i in range(kvar)]
    all_can_spouse = [[] for i in range(kvar)]
    all_sepset = [[[]] * kvar for i in range(kvar)]
    Q = [target]
    tmp = []

    num_calculated = 0

    while len(tmp) <= kvar and len(Q) > 0:
        A = Q[0]
        del Q[0]
        if A in tmp:
            continue
        else:
            tmp.append(A)

        # get MB(A)
        if mb_calcualted[A]:
            all_mb[A], ntest = MMMB(data, A, alpha, is_discrete)
            ci_test += ntest
            mb_calcualted[A] = False

        all_pc[A] = all_mb[A].copy()

        for B in all_mb[A]:
            Q.append(B)
            DAG[A, B] = 1
            DAG[B, A] = 1
            if pdag[A, B] == 0 and pdag[B, A] == 0:
                pdag[A, B] = 1
                pdag[B, A] = 1
                G[A, B] = 1
                G[B, A] = 1

            cutSetSize = 0
            break_flag = False
            can_pc = [i for i in all_mb[A] if i != B]
            while len(can_pc) >= cutSetSize and cutSetSize <= max_k:
                SS = subsets(can_pc, cutSetSize)
                for z in SS:
                    ci_test += 1
                    pval, _ = cond_indep_test(data, B, A, z, is_discrete)

                    if pval > alpha:
                        all_sepset[A][B] = [i for i in z]
                        all_sepset[B][A] = [i for i in z]

                        DAG[A, B] = 0
                        DAG[B, A] = 0
                        pdag[A, B] = 0
                        pdag[B, A] = 0
                        G[A, B] = 0
                        G[B, A] = 0

                        all_pc[A] = [i for i in all_pc[A] if i != B]
                        all_can_spouse[A].append(B)

                        break_flag = True
                        break
                if break_flag:
                    break
                cutSetSize += 1
        # print("all_sepset: ", all_sepset)
        # find v-structures
        for C in all_can_spouse[A]:
            for B in all_pc[A]:

                # A->B<-C
                if B not in all_sepset[A][C]:
                    DAG[A, B] = 1
                    DAG[B, A] = 1

                    pdag[A, B] = -1
                    pdag[B, A] = 0

                    pdag[C, B] = -1
                    pdag[B, C] = 0

                    G[A, B] = 1
                    G[B, A] = 0

                    G[C, B] = 1
                    G[B, C] = 0

        [DAG, pdag, G] = meek(DAG, pdag, G, kvar)

        num_calculated += 1
        if num_calculated > len(all_mb[target]):
            if 1 not in pdag[target, :] and 1 not in pdag[:, target]:
                break

    parents = [i for i in range(kvar) if pdag[i, target] == -1]
    children = [i for i in range(kvar) if pdag[target, i] == -1]
    undirected = [i for i in range(kvar) if pdag[target, i] == 1]
    PC = list(set(parents).union(set(children)).union(set(undirected)))

    return parents, children, PC, undirected


# import warnings
# warnings.filterwarnings('ignore')
# import pandas as pd
# data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv")
# print("the file read")
# import numpy as np
# num1, kvar = np.shape(data)
# alaph = 0.01
#
# for target in range(kvar):
#     P, C, PC, und = MBbyMB(data, target, alaph, True)
#     print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
예제 #9
0
def BAMB(data, target, alaph, is_discrete=True):
    ci_number = 0
    number, kVar = np.shape(data)
    max_k = 3
    CPC = []
    TMP = [i for i in range(kVar) if i != target]
    sepset = [[] for i in range(kVar)]
    CSPT = [[] for i in range(kVar)]
    variDepSet = []
    SP = [[] for i in range(kVar)]
    PC = []

    for x in TMP:
        ci_number += 1
        pval_f, dep_f = cond_indep_test(data, target, x, [], is_discrete)
        if pval_f > alaph:
            sepset[x] = []
        else:
            variDepSet.append([x, dep_f])

    variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)
    """step one: Find the candidate set of PC and candidate set of spouse"""

    # print("variDepSet" + str(variDepSet))
    for variIndex in variDepSet:
        A = variIndex[0]
        # print("A is: " + str(A))
        Slength = len(CPC)
        if Slength > max_k:
            Slength = 3
        breakFlag = False
        for j in range(Slength + 1):
            ZSubsets = subsets(CPC, j)
            for Z in ZSubsets:
                ci_number += 1
                convari = [i for i in Z]
                pval_TAZ, dep_TAZ = cond_indep_test(data, target, A, convari,
                                                    is_discrete)
                if pval_TAZ > alaph:
                    sepset[A] = convari
                    breakFlag = True
                    # print("ZZZ")
                    break
            if breakFlag:
                break

        if not breakFlag:
            CPC_ReA = CPC.copy()
            B_index = len(CPC_ReA)
            CPC.append(A)
            breakF = False
            while B_index > 0:
                B_index -= 1
                B = CPC_ReA[B_index]
                flag1 = False

                conditionSet = [i for i in CPC_ReA if i != B]
                Clength = len(conditionSet)
                if Clength > max_k:
                    Clength = max_k
                for j in range(Clength + 1):
                    CSubsets = subsets(conditionSet, j)
                    for Z in CSubsets:
                        ci_number += 1
                        convari = [i for i in Z]
                        pval_TBZ, dep_TBZ = cond_indep_test(
                            data, target, B, convari, is_discrete)
                        # print("pval_TBZ: " + str(pval_TBZ))
                        if pval_TBZ >= alaph:

                            CPC.remove(B)
                            CSPT[B] = []
                            sepset[B] = convari

                            flag1 = True
                            if B == A:
                                breakF = True
                    if flag1:
                        break
                if breakF:
                    break

            CSPT[A] = []
            pval_CSPT = []

            # add candidate of spouse

            # print("sepset: " + str(sepset))
            for C in range(kVar):
                if C == target or C in CPC:
                    continue
                conditionSet = [i for i in sepset[C]]
                conditionSet.append(A)
                conditionSet = list(set(conditionSet))

                ci_number += 1
                pval_CAT, _ = cond_indep_test(data, target, C, conditionSet,
                                              is_discrete)
                if pval_CAT <= alaph:
                    CSPT[A].append(C)
                    pval_CSPT.append([C, pval_CAT])
            """step 2-1"""

            pval_CSPT = sorted(pval_CSPT, key=lambda x: x[1], reverse=False)
            SP[A] = []
            # print("CSPT-: " +str(CSPT))
            # print("pval_CSPT is: " + str(pval_CSPT))

            for pCSPT_index in pval_CSPT:
                E = pCSPT_index[0]
                # print("E is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " +str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]
                    breakFlag = False
                    # print("x is:" + str(x))

                    ZAllconditionSet = [i for i in SP[A] if i != x]
                    # print("ZAllconditionSet is:" + str(ZAllconditionSet))
                    for Z in ZAllconditionSet:
                        conditionvari = [Z]
                        if A not in conditionvari:
                            conditionvari.append(A)
                        ci_number += 1
                        pval_TXZ, _ = cond_indep_test(data, target, x,
                                                      conditionvari,
                                                      is_discrete)
                        # print("x is: " + str(x) + "conditionvari: " + str(conditionvari) + " ,pval_TXZ is: " + str(pval_TXZ))
                        if pval_TXZ > alaph:
                            # print("spa is: " + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(conditionvari))
                            SP[A].remove(x)
                            breakFlag = True

                            if x == E:
                                breakflag_spa = True
                            break
                    if breakFlag:
                        break
                if breakflag_spa:
                    break
            """step 2-2"""
            # remove x from pval_CSPT
            pval_CSPT_new = []
            plength = len(pval_CSPT)
            for i in range(plength):
                if pval_CSPT[i][0] in SP[A]:
                    pval_CSPT_new.append(pval_CSPT[i])

            CSPT[A] = SP[A]
            SP[A] = []
            # print("CSPT-: " + str(CSPT))
            # print("2222222pval_CSPT_new is: " + str(pval_CSPT_new))

            for pCSPT_index in pval_CSPT_new:
                E = pCSPT_index[0]
                # print("E2 is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " + str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]

                    breakFlag = False
                    # print("x is:" + str(x))
                    ZAllSubsets = list(set(CPC).union(set(SP[A])))
                    # print("CPC is: " + str(CPC) + " , SP[A] is: " + str(SP[A]) + " ,A is" + str(A) + " ,x is:" + str(x) + " ,ZA is: " + str(ZAllSubsets))
                    ZAllSubsets.remove(x)
                    ZAllSubsets.remove(A)
                    # print("-ZALLSubsets has: " + str(ZAllSubsets))
                    Zalength = len(ZAllSubsets)
                    if Zalength > max_k:
                        Zalength = max_k
                    for j in range(Zalength + 1):
                        ZaSubsets = subsets(ZAllSubsets, j)
                        for Z in ZaSubsets:
                            Z = [i for i in Z]
                            ci_number += 1
                            pval_TXZ, _ = cond_indep_test(
                                data, A, x, Z, is_discrete)
                            # print("Z is: " + str(Z) + " ,A is: " + str(A) + " ,x is: " + str(x) + " ,pval_txz is: " + str(pval_TXZ))
                            if pval_TXZ > alaph:
                                # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z))
                                SP[A].remove(x)
                                breakFlag = True
                                if x == E:
                                    breakflag_spa = True
                                break
                        if breakFlag:
                            break
                    if breakflag_spa:
                        break
            """ step 2-3"""
            pval_CSPT_fin = []
            plength = len(pval_CSPT)
            for i in range(plength):
                if pval_CSPT[i][0] in SP[A]:
                    pval_CSPT_fin.append(pval_CSPT[i])

            CSPT[A] = SP[A]
            SP[A] = []
            # print("CSPT-: " +str(CSPT))
            # print("2222222pval_CSPT_fin is: " + str(pval_CSPT_fin))

            for pCSPT_index in pval_CSPT_fin:
                E = pCSPT_index[0]
                # print("E3 is:" + str(E))

                SP[A].append(E)
                index_spa = len(SP[A])
                breakflag_spa = False
                # print("SP[A] is: " + str(SP[A]))
                while index_spa >= 0:
                    index_spa -= 1
                    x = SP[A][index_spa]
                    breakFlag = False

                    # print("x is:" + str(x))
                    ZAllSubsets = list(set(CPC).union(set(SP[A])))
                    ZAllSubsets.remove(x)
                    ZAllSubsets.remove(A)
                    Zalength = len(ZAllSubsets)
                    # print("=-ZALLSubsets has: " + str(ZAllSubsets))
                    if Zalength > max_k:
                        Zalength = max_k
                    for j in range(Zalength + 1):
                        ZaSubsets = subsets(ZAllSubsets, j)
                        # print("ZzSubsets is: " + str(ZaSubsets))
                        for Z in ZaSubsets:
                            Z = [i for i in Z]
                            Z.append(A)
                            # print("Z in ZaSubsets is: " + str(Z))
                            ci_number += 1
                            pval_TXZ, _ = cond_indep_test(
                                data, target, x, Z, is_discrete)
                            # print("-Z is: " + str(Z) + " ,x is: " + str(x) + " ,pval_txz is: " + str(
                            #     pval_TXZ))
                            if pval_TXZ >= alaph:
                                # print("spa is:" + str(SP[A]) + " .remove x is: " + str(x) + " ,Z is: " + str(Z))
                                SP[A].remove(x)
                                if x == E:
                                    breakflag_spa = True
                                breakFlag = True
                                break
                        if breakFlag:
                            break
                    if breakflag_spa:
                        break
            # print("SP[A]------: " + str(SP[A]))
            CSPT[A] = SP[A]
            # print("CSPT is: " + str(CSPT))
            """step3: remove false positives from the candidate set of PC"""

            CPC_temp = CPC.copy()
            x_index = len(CPC_temp)
            A_breakFlag = False
            # print("-CPC-: " + str(CPC))
            while x_index >= 0:
                x_index -= 1
                x = CPC_temp[x_index]
                flag2 = False
                ZZALLsubsets = [i for i in CPC if i != x]
                # print("xx is: " + str(x) + ", ZZALLsubsets is: " + str(ZZALLsubsets ))
                Zlength = len(ZZALLsubsets)
                if Zlength > max_k:
                    Zlength = max_k
                for j in range(Zlength + 1):
                    Zzsubsets = subsets(ZZALLsubsets, j)
                    for Z in Zzsubsets:
                        conditionSet = [
                            i for y in Z for i in CSPT[y] if i not in CPC
                        ]
                        conditionSet = list(set(conditionSet).union(set(Z)))
                        # print("conditionSet: " + str(conditionSet))
                        ci_number += 1
                        pval, _ = cond_indep_test(data, target, x,
                                                  conditionSet, is_discrete)
                        if pval >= alaph:
                            # print("remove x is: " + str(x) + " , pval is: " + str(pval) + " ,conditionset is: " + str(conditionSet))
                            CPC.remove(x)
                            CSPT[x] = []
                            flag2 = True
                            if x == A:
                                A_breakFlag = True
                            break
                    if flag2:
                        break
                if A_breakFlag:
                    break

    # print("SP is:" + str(SP))
    spouseT = [j for i in CPC for j in CSPT[i]]
    MB = list(set(CPC).union(set(spouseT)))
    return MB, ci_number
예제 #10
0
def pc(Data, alpha):
    time_start = time.time()

    ind_test = 0  # the number of condition independency test
    Num, NbVar = Data.shape

    sepset = [[[]] * NbVar for i in range(NbVar)]

    DAG = np.ones((NbVar, NbVar))
    for i in range(NbVar):
        DAG[i][i] = 0

    # stage 1: construct skeletons
    n = 0
    done = False
    while not done:
        done = True
        for x in range(NbVar):
            adjx = [i for i in range(NbVar) if DAG[x, i] == 1]
            if len(adjx) >= n:
                done = False
                for y in adjx:
                    cx_y = [i for i in adjx if i != y]
                    SS = subsets(cx_y, n)
                    # cx_y = my_set_diff_two(adjx, y)  # Adj(c,x)\{y}
                    # SS = list(combinations(cx_y, n))
                    for S in SS:
                        #sub_data_script = [x, y] + list(map(int, S))
                        #xyz_data = Data[:, sub_data_script]  # (X, target, subset)
                        #pval = mi_test(xyz_data)  # use MI to test conditional independence
                        # _, pval, _, _ = chi_square_test(Data, x, y, list(map(int, S)))
                        pval, _ = cond_indep_test(Data, x, y,
                                                  list(map(int, S)), True)
                        ind_test = ind_test + 1
                        if pval > alpha:
                            DAG[x, y] = 0
                            DAG[y, x] = 0
                            if list(map(int, S)) not in sepset[x][y]:
                                sepset[x][y].append(list(map(int, S)))
                            if list(map(int, S)) not in sepset[y][x]:
                                sepset[y][x].append(list(map(int, S)))
                            break
        n += 1

    # stage 2: create V structures orient X-Y-Z => X -> Y <- Z
    print('stage 2')
    pDAG = DAG.copy()

    gtmp = DAG.copy()

    X = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
    Y = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
    for i in range(len(X)):
        x = X[i]
        y = Y[i]
        Z = [j for j in range(NbVar) if DAG[y, j] == 1 and j != x]

        for z in Z:
            if DAG[x, z] == 0 and [y] not in sepset[x][z]:
                pDAG[x, y] = -1
                pDAG[y, x] = 0
                pDAG[z, y] = -1
                pDAG[y, z] = 0

                gtmp[x, y] = 1
                gtmp[y, x] = 0
                gtmp[z, y] = 1
                gtmp[y, z] = 0
    # stage 3:edge oriented
    print('stage 3')
    old_pDAG = np.zeros((NbVar, NbVar))
    iter = 0
    while not (pDAG == old_pDAG).all():
        iter += 1
        old_pDAG = pDAG
        # rule 1: A->B--C ==>B->C
        A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == -1]
        B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == -1]
        for i in range(len(A)):
            a = A[i]
            b = B[i]
            C = [
                j for j in range(NbVar) if pDAG[b][j] == 1 and pDAG[a][j] == 0
            ]
            for c in C:
                pDAG[b][c] = -1
                pDAG[c][b] = 0
                gtmp[b][c] = 1
                gtmp[c][b] = 0

        # rule 2: A->C->B,A--B=>A->B
        A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
        B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
        for i in range(len(A)):
            a = A[i]
            b = B[i]
            if np.any(
                    np.multiply(np.array(pDAG[a, :] == -1),
                                np.array(pDAG[:, b] == -1))):
                pDAG[a][b] = -1
                pDAG[b][a] = 0
                gtmp[a][b] = 1
                gtmp[b][a] = 0

        # rule 3:     % a--c->b, a--d->b, pDAG(c,d)=pDAG(d,c)=0, a--b  => a->b
        A = [i for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
        B = [j for i in range(NbVar) for j in range(NbVar) if DAG[i, j] == 1]
        for i in range(len(A)):
            a = A[i]
            b = B[i]
            C = [
                j for j in range(NbVar) if pDAG[a][j] == 1 and pDAG[j][b] == -1
            ]
            for c in C:
                for d in C:
                    if pDAG[c][d] == 0 and c != d:
                        pDAG[a][b] = -1
                        pDAG[b][a] = 0
                        gtmp[a][b] = 1
                        gtmp[b][a] = 0
                        break

    time_end = time.time()
    time_cost = time_end - time_start
    print('running time is:', time_cost, 's')
    return pDAG, ind_test
예제 #11
0
def getPCD(data, target, alaph, is_discrete):
    number, kVar = np.shape(data)
    max_k = 3
    PCD = []
    ci_number = 0

    # use a list of sepset[] to store a condition set which can make target and the variable condition independence
    # the above-mentioned variable will be remove from CanPCD or PCD
    sepset = [[] for i in range(kVar)]

    while True:
        variDepSet = []
        CanPCD = [i for i in range(kVar) if i != target and i not in PCD]
        CanPCD_temp = CanPCD.copy()

        for vari in CanPCD_temp:
            breakFlag = False
            dep_gp_min = float("inf")
            vari_min = -1

            if len(PCD) >= max_k:
                Plength = max_k
            else:
                Plength = len(PCD)

            for j in range(Plength + 1):
                SSubsets = subsets(PCD, j)
                for S in SSubsets:
                    ci_number += 1
                    pval_gp, dep_gp = cond_indep_test(data, target, vari, S,
                                                      is_discrete)

                    if pval_gp > alaph:
                        vari_min = -1
                        CanPCD.remove(vari)
                        sepset[vari] = [i for i in S]
                        breakFlag = True
                        break
                    elif dep_gp < dep_gp_min:
                        dep_gp_min = dep_gp
                        vari_min = vari

                if breakFlag:
                    break

            # use a list of variDepset to store list, like [variable, its dep]
            if vari_min in CanPCD:
                variDepSet.append([vari_min, dep_gp_min])

        # sort list of variDepSet by dep from max to min
        variDepSet = sorted(variDepSet, key=lambda x: x[1], reverse=True)

        # if variDepset is null ,that meaning PCD will not change
        if variDepSet != []:
            y = variDepSet[0][0]
            PCD.append(y)
            pcd_index = len(PCD)
            breakALLflag = False
            while pcd_index >= 0:
                pcd_index -= 1
                x = PCD[pcd_index]
                breakFlagTwo = False

                conditionSetALL = [i for i in PCD if i != x]
                if len(conditionSetALL) >= max_k:
                    Slength = max_k
                else:
                    Slength = len(conditionSetALL)

                for j in range(Slength + 1):
                    SSubsets = subsets(conditionSetALL, j)
                    for S in SSubsets:
                        ci_number += 1
                        pval_sp, dep_sp = cond_indep_test(
                            data, target, x, S, is_discrete)

                        if pval_sp > alaph:

                            PCD.remove(x)
                            if x == y:
                                breakALLflag = True

                            sepset[x] = [i for i in S]
                            breakFlagTwo = True
                            break
                    if breakFlagTwo:
                        break

                if breakALLflag:
                    break
        else:
            break
    return list(set(PCD)), sepset, ci_number
예제 #12
0
파일: STMB.py 프로젝트: jhj111/pyCausalFS
def STMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    ci_number = 0
    PCT = [i for i in range(kVar) if i != target]
    PCT, sepset, ci_num2 = RecognizePC(data, target, PCT, alaph, is_discrete)
    ci_number += ci_num2

    spouse = [[] for i in range(kVar)]
    remove = []
    for y in PCT:
        X_set = [i for i in range(kVar) if i != target and i not in PCT]
        # print("y: " + str(y) + " ,X_set is:" + str(X_set))
        breakFlag = False
        for x in X_set:
            conditionsSet = [str(i) for i in sepset[x]]
            conditionsSet.append(str(y))
            conditionsSet = list(set(conditionsSet))

            ci_number += 1
            pval_xt, dep_xt = cond_indep_test(data, target, x, conditionsSet,
                                              is_discrete)
            # print("x is: " + str(x) + " conditionSet is: " + str(conditionsSet) + "pval_xt is: " + str(pval_xt))
            if pval_xt <= alaph:
                Zset = [i for i in PCT]
                Zset.append(x)
                Zset = list(set(Zset))
                if y in Zset:
                    Zset.remove(y)

                if len(Zset) >= 3:
                    Zlength = 3
                else:
                    Zlength = len(Zset)
                # Zlength +1 is important!
                for j in range(Zlength + 1):
                    Zsubsets = subsets(Zset, j)
                    for Z in Zsubsets:
                        ci_number += 1
                        pval_yt, dep_yt = cond_indep_test(
                            data, target, y, Z, is_discrete)
                        if pval_yt > alaph:
                            # print("remove append is: " + str(y))
                            remove.append(y)
                            breakFlag = True
                            break
                    if breakFlag:
                        break
                if breakFlag:
                    break
                else:
                    spouse[y].append(x)
                    # print("x append is: " + str(x) + " ,spouse[" + str(y) + "] has: " + str(spouse[y]))

    PCT = [i for i in PCT if i not in remove]
    # print("-PCT has: " + str(PCT))
    # print(len(spouse))
    # print("spouse is:" + str(spouse))
    for y in range(len(spouse)):
        # print("y is: " + str(y))
        if spouse[y] != []:
            spouseY_temp = spouse[y].copy()
            # print("spouse[" +str(y)+ "] has: " + str(spouse[y]))
            for x in spouseY_temp:
                testSet = [
                    i for i in range(kVar) if i in PCT or i in spouse[y]
                ]
                testSet = list(set(testSet))
                # print("testSet has: " + str(testSet))
                if x in testSet:
                    testSet.remove(x)

                ci_number += 1
                pval_xt_testset, _ = cond_indep_test(data, target, x, testSet,
                                                     is_discrete)
                if pval_xt_testset > alaph:
                    # print("spouse[y] had: " + str(spouse[y]))
                    spouse[y].remove(x)
                    # print("spouse[y] now has: " + str(spouse[y]))

    M_variSet = PCT.copy()
    # print("M_variSet is:" + str(M_variSet))
    for x in M_variSet:
        conditionsVariSet = [i for j in range(len(spouse)) for i in spouse[j]]
        conditionsVariSet = list(set(conditionsVariSet).union(set(PCT)))
        # print("conditionsVariSet is: " + str(conditionsVariSet))
        if x in conditionsVariSet:
            conditionsVariSet.remove(x)

        ci_number += 1
        pval_final, _ = cond_indep_test(data, target, x, conditionsVariSet,
                                        is_discrete)
        if pval_final > alaph:
            PCT.remove(x)

    spouse = [i for j in range(len(spouse)) for i in spouse[j]]
    MB = list(set(PCT).union(set(spouse)))

    return MB, ci_number


# import  pandas as pd
# data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v3.csv")
# print("the file read")
#
# target = 11
# alaph = 0.05
#
# MBs=STMB(data, target, alaph, is_discrete=False)
# print("MBs is: "+str(MBs))

# F1 is: 0.7526467421467425
# Precision is: 0.8019166666666667
# Recall is: 0.7789583333333334
# time is: 11.730078125

#5000

# F1 is: 0.86
# Precision is: 0.86
# Recall is: 0.87
# Distance is: 0.21
# ci_number is: 142.295
# time is: 70.24
예제 #13
0
파일: LCMB.py 프로젝트: ziipeen/pyCausalFS
def LRH(data, target, alaph, is_discrete=True):
    ci_number = 0
    number, kVar = np.shape(data)
    max_k = 3
    M = []
    while True:
        # selection
        M1 = []
        x_dep_set = []
        variables = [i for i in range(kVar) if i != target and i not in M]
        for x in variables:
            ci_number += 1
            pval, dep = cond_indep_test(data, target, x, M, is_discrete)
            if pval <= alaph:
                M1.append(x)
                x_dep_set.append([x, dep])

        # exclusion
        if M1 == []:
            break
        elif len(M1) == 1:
            M.append(M1[0])
            continue
        M2 = []
        # print("M is: " + str(M))
        # print("M1 is: " + str(M1))
        for x in M1:
            # print("x is: " + str(x))
            NX = []
            vari_set = [i for i in M1 if i != x]
            for y in vari_set:
                ci_number += 1
                pval, _ = cond_indep_test(data, x, y, M, is_discrete)
                if pval <= alaph:
                    NX.append(y)
            # print("NX is:" + str(NX))
            Nlength = len(NX)
            if Nlength > max_k:
                Nlength = 3
            break_flag = False
            for j in range(Nlength + 1):
                Z_set = subsets(NX, j)
                for Z in Z_set:
                    conditionset = list(set(Z).union(set(M)))
                    ci_number += 1
                    pval, _ = cond_indep_test(data, target, x, conditionset,
                                              is_discrete)
                    # print("pval is: " + str(pval) + " ,x is: " + str(x) + " ,conditionset is: " + str(conditionset))
                    if pval > alaph:
                        break_flag = True
                        break
                if break_flag:
                    break
            if not break_flag:
                M2.append(x)
                # print("M2 append is: " + str(M2))
        # print("M2 is: " + str(M2))
        Y = []

        if M2 == []:
            x_dep_set = sorted(x_dep_set, key=lambda x: x[1], reverse=True)
            # print("-x_dep_set is: " + str(x_dep_set))
            if x_dep_set != []:
                dep_max = x_dep_set[0][1]
                for m in x_dep_set:
                    if m[1] == dep_max:
                        Y.append(m[0])
                    else:
                        break
        else:
            x_dep_set = []
            for x in M2:
                ci_number += 1
                pval, dep = cond_indep_test(data, target, x, M, is_discrete)
                if pval <= alaph:
                    x_dep_set.append([x, dep])
            x_dep_set = sorted(x_dep_set, key=lambda x: x[1], reverse=True)
            # print("--x_dep_set is: " + str(x_dep_set))
            if x_dep_set != []:
                dep_max = x_dep_set[0][1]
                for m in x_dep_set:
                    if m[1] == dep_max:
                        Y.append(m[0])
                    else:
                        break

        # M3 = [i for i in M1 if i not in M2]
        M = list(set(M).union(set(Y)))

    # print("-M is: " + str(M))
    M_temp = M.copy()
    for x in M_temp:
        conditionset = [i for i in M if i != x]
        ci_number += 1
        pval, _ = cond_indep_test(data, target, x, conditionset, is_discrete)
        # print("pval is: " + str(pval) + " , x is: " + str(x))
        if pval > alaph:
            M.remove(x)

    return M, ci_number


# data = pd.read_csv("C:/pythonProject/pyCausalFS/data/child_s500_v1.csv")
# print("the file read")
#
# target = 19
# alaph = 0.01
#
# MB = LRH(data, target, alaph)
# print("MBs is: " + str(MB))

# 500

# F1 is: 0.76
# Precision is: 0.85
# Recall is: 0.76
# Distance is: 0.34
# ci_number is: 331.96
# time is: 43.03

# 5000

# F1 is: 0.91
# Precision is: 0.90
# Recall is: 0.94
# Distance is: 0.14
# ci_number is: 1.00
# time is: 238.92
예제 #14
0
파일: STMB.py 프로젝트: ziipeen/pyCausalFS
def STMB(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    ci_number = 0
    PCT = [i for i in range(kVar) if i != target]
    PCT, sepset, ci_num2 = RecognizePC(data, target, PCT, alaph, is_discrete)
    ci_number += ci_num2

    spouse = [[] for i in range(kVar)]
    remove = []
    for y in PCT:
        X_set = [i for i in range(kVar) if i != target and i not in PCT]
        # print("y: " + str(y) + " ,X_set is:" + str(X_set))
        breakFlag = False
        for x in X_set:
            conditionsSet = [i for i in sepset[x]]
            conditionsSet.append(y)
            conditionsSet = list(set(conditionsSet))

            ci_number += 1
            pval_xt, dep_xt = cond_indep_test(data, target, x, conditionsSet,
                                              is_discrete)
            # print("x is: " + str(x) + " conditionSet is: " + str(conditionsSet) + "pval_xt is: " + str(pval_xt))
            if pval_xt <= alaph:
                Zset = [i for i in PCT]
                Zset.append(x)
                Zset = list(set(Zset))
                if y in Zset:
                    Zset.remove(y)

                if len(Zset) >= 3:
                    Zlength = 3
                else:
                    Zlength = len(Zset)
                # Zlength +1 is important!
                for j in range(Zlength + 1):
                    Zsubsets = subsets(Zset, j)
                    for Z in Zsubsets:
                        ci_number += 1
                        pval_yt, dep_yt = cond_indep_test(
                            data, target, y, Z, is_discrete)
                        if pval_yt > alaph:
                            # print("remove append is: " + str(y))
                            remove.append(y)
                            breakFlag = True
                            break
                    if breakFlag:
                        break
                if breakFlag:
                    break
                else:
                    spouse[y].append(x)

    PCT = [i for i in PCT if i not in remove]

    for y in range(len(spouse)):
        if spouse[y] != []:
            spouseY_temp = spouse[y].copy()
            for x in spouseY_temp:
                testSet = [
                    i for i in range(kVar) if i in PCT or i in spouse[y]
                ]
                testSet = list(set(testSet))
                # print("testSet has: " + str(testSet))
                if x in testSet:
                    testSet.remove(x)

                ci_number += 1
                pval_xt_testset, _ = cond_indep_test(data, target, x, testSet,
                                                     is_discrete)
                if pval_xt_testset > alaph:
                    # print("spouse[y] had: " + str(spouse[y]))
                    spouse[y].remove(x)
                    # print("spouse[y] now has: " + str(spouse[y]))

    M_variSet = PCT.copy()
    # print("M_variSet is:" + str(M_variSet))
    for x in M_variSet:
        conditionsVariSet = [i for j in range(len(spouse)) for i in spouse[j]]
        conditionsVariSet = list(set(conditionsVariSet).union(set(PCT)))
        # print("conditionsVariSet is: " + str(conditionsVariSet))
        if x in conditionsVariSet:
            conditionsVariSet.remove(x)

        ci_number += 1
        pval_final, _ = cond_indep_test(data, target, x, conditionsVariSet,
                                        is_discrete)
        if pval_final > alaph:
            PCT.remove(x)

    spouse = [i for j in range(len(spouse)) for i in spouse[j]]
    MB = list(set(PCT).union(set(spouse)))

    return MB, ci_number
예제 #15
0
def MB_by_MB(data, target, alaph, is_discrete=True):
    n, p = np.shape(data)
    Donelist = []  # whose MBs have been found
    Waitlist = [target]  # whose MBs will be foundM
    G = np.zeros((p, p))  # 1 denotes ->, 0 denote no edges
    pdag = G.copy()  # -1 denotes ->, 0 denote no edges
    DAG = G.copy()  # 1 denote -,0 denote no edges
    MB = [[] for i in range(p)]
    sepset = [[[]] * p for i in range(p)]
    k = 3
    while Waitlist != []:
        stop = False
        Waitlist_temp = Waitlist.copy()
        for x in Waitlist_temp:
            spouse = [[] for i in range(p)]
            Donelist.append(x)
            Waitlist.remove(x)
            MB[x], _ = IAMB(data, x, alaph, is_discrete)
            for i in MB[x]:
                Waitlist.append(i)
            findflag = False
            for i in range(len(MB)):
                if set(MB[x]) < set(MB[i]):
                    findflag = True
                    break
            if set(MB[x]) <= set(Donelist):
                findflag = True
            if findflag:
                continue
            # find spouse and pc
            # print("find spouse and pc")
            pc = MB[x].copy()
            # print("MB is " + str(MB))
            for i in range(len(MB[x])):
                cutsetsize = 0
                break_flag = 0
                c = MB[x][i]
                # print("c is " + str(c))
                CanPC = [i for i in MB[x] if i != c]
                # print("CanPC is " + str(CanPC))
                while len(CanPC) >= cutsetsize and cutsetsize <= k:
                    SS = subsets(CanPC, cutsetsize)
                    # print("SS is " + str(SS))
                    for s in SS:
                        # print("s is " + str(s))
                        pval, _ = cond_indep_test(data, x, c, s, is_discrete)
                        # print("pval is " + str(pval))
                        if pval <= alaph:
                            continue
                        else:
                            sepset[x][c] = s
                            # print("sepset[x][c] is " + str(sepset[x][c]))
                            pc.remove(c)
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutsetsize += 1
            # print("pc is " + str(pc))
            rest = [i for i in MB[x] if i not in pc]
            # print("rest is " + str(rest))
            for i in range(len(rest)):
                for j in range(len(pc)):
                    if pc[j] in sepset[x][rest[i]]:
                        continue
                    condition = [str(m) for m in sepset[x][rest[i]]]
                    # print("before condition is " + str(condition))
                    condition = list(set(condition).union(set(str(rest[i]))))
                    # print("condition is " + str(condition))
                    pval, _ = cond_indep_test(data, rest[i], x, condition,
                                              is_discrete)
                    # print("pval is "+ str(pval))
                    if pval <= alaph or math.isnan(pval):
                        spouse[j].append(rest[i])

            # print("v-structure")
            # print("spouse is " + str(spouse))
            # construct v-strcture
            for i in range(len(pc)):
                b = pc[i]
                DAG[x, b] = 1
                DAG[b, x] = 1
                if pdag[x, b] == 0 and pdag[b, x] == 0:
                    pdag[x, b] = 1
                    pdag[b, x] = 1
                    G[x, b] = 1
                    G[b, x] = 1
                if len(spouse[i]) > 0:
                    for j in range(len(spouse[i])):
                        c = spouse[i][j]
                        DAG[c, b] = 1
                        DAG[b, c] = 1
                        DAG[x, c] = 0
                        DAG[c, x] = 0
                        pdag[x, b] = -1
                        pdag[c, b] = -1
                        pdag[b, x] = 0
                        pdag[b, c] = 0
                        pdag[x, c] = 0
                        pdag[c, x] = 0
                        G[x, b] = 1
                        G[c, b] = 1
                        G[b, x] = 0
                        G[b, c] = 0
                        G[c, x] = 0
                        G[x, c] = 0
                        # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0
                        # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0
            # oriented by meek approach
            # print("meek")
            pDAG = Meek(DAG, pDAG, data)
            # if all edges connected to T are oriented
            stop = True
            connect = [i for i in range(p)
                       if DAG[target, i] == 1]  # all nodes connected to target
            # print("connect is " + str(connect))
            for i in connect:
                if pdag[target, i] != -1 and pdag[i, target] != -1:
                    stop = False
                    break
            if stop:
                break
        if stop:
            break
        # print("Donelist is " + str(Donelist))
        # print("Waitlist is " + str(Waitlist))
        Waitlist = list(set(Waitlist))
        for i in Donelist:
            if i in Waitlist:
                Waitlist.remove(i)
        # print("Waitlist is " + str(Waitlist))
    np.transpose(G)
    np.transpose(pdag)
    parents = [i for i in range(p) if pdag[i, target] == -1]
    children = [i for i in range(p) if pdag[target, i] == -1]
    undirected = [i for i in range(p) if pdag[target, i] == 1]
    return parents, children, undirected


# # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv")
# data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv")
# # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt"
# # data = np.loadtxt(path, dtype=None, delimiter= ' ')
# target = 0
# Graph, p, c = MB_by_MB(data,target,0.01)
# print("\nin the last -------------------------------------")
# print(Graph)
# print("target " + str(target) + " parents are " + str(p))
# print("target " + str(target) + " children are " + str(c))
예제 #16
0
def MBGSL(data, alpha, is_discrete, selected):
    _, kvar = np.shape(data)
    max_k = 3
    all_MB = [[] for i in range(kvar)]
    all_neighbor = [[] for i in range(kvar)]
    PP = np.zeros((kvar, kvar))
    num_CI = 0

    for i in range(kvar):
        if selected == 1:
            MB, n_c = MMMB(data, i, alpha, is_discrete)
        elif selected == 2:
            MB, n_c = HITON_MB(data, i, alpha, is_discrete)
        elif selected == 3:
            MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete)
        else:
            MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete)
        num_CI += n_c
        for j in MB:
            PP[i, j] = 1

    # # AND Rule
    # for i in range(kvar):
    #     for j in range(0, i):
    #         if DAG[i, j] != DAG[j, i]:
    #             DAG[i, j] = 0
    #             DAG[j, i] = 0

    for i in range(kvar):
        for j in range(0, i):
            if PP[i, j] != PP[j, i]:
                PP[i, j] = 1
                PP[j, i] = 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_MB[i].append(j)

    # removes the possible spouse links between linked variables X and Y
    for x in range(kvar):
        for y in all_MB[x]:
            vs = set(all_MB[x]).union(set(all_MB[y]))
            varis = list((set(all_MB[x]).difference([y])).union(
                set(all_MB[y]).difference([x])))
            k = 0
            break_flag = False
            while len(varis) > k and k <= max_k:
                ss = subsets(varis, k)
                for s in ss:

                    num_CI += 1
                    pval, _ = cond_indep_test(data, x, y, s, is_discrete)
                    if pval > alpha:
                        PP[x, y] = 0
                        PP[x, y] = 0
                        break_flag = True
                        break
                if break_flag:
                    break
                k += 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_neighbor[i].append(j)

    DAG = PP.copy()
    pdag = DAG.copy()
    G = DAG.copy()

    # orient edges
    for x in range(kvar):
        for y in all_neighbor[x]:
            sz = list((set(all_neighbor[x]).difference(
                all_neighbor[y])).difference([y]))
            for z in sz:
                PP[y, x] = -1
                B = list((set(all_MB[y]).difference([z])).union(
                    set(all_MB[z]).difference([y])))
                break_flag = False
                cutSetSize = 0
                while len(B) >= cutSetSize and cutSetSize == 0:
                    SS = subsets(B, cutSetSize)
                    for s in SS:
                        cond_s = list(set(s).union([x]))

                        num_CI += 1
                        pval, _ = cond_indep_test(data, y, z, cond_s,
                                                  is_discrete)
                        if pval > alpha:
                            PP[y, x] = 1
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutSetSize += 1
            if PP[y, x] == -1:
                pdag[y, x] = -1
                pdag[x, y] = 0
                G[y, x] = 1
                G[x, y] = 0
                break

    DAG, pdag, G = meek(DAG, pdag, G, kvar)

    return pdag, num_CI
예제 #17
0
def MMPC(data, target, alpha, is_discrete):
    number, kVar = np.shape(data)
    ci_number = 0
    CPC = []
    deoZeroSet = []
    sepset = [[] for i in range(kVar)]

    while True:
        M_variables = [
            i for i in range(kVar)
            if i != target and i not in CPC and i not in deoZeroSet
        ]
        vari_all_dep_max = -float("inf")
        vari_chose = 0

        # according to pseudocode, <F,assocF> = MaxMinFeuristic(T;CPC)
        for x in M_variables:
            # use a function of getMinDep to chose min dep of x
            x_dep_min, sepset_temp, ci_num2 = getMinDep(
                data, target, x, CPC, alpha, is_discrete)
            ci_number += ci_num2
            # print(str(x)+" dep min is: " + str(x_dep_min))

            # if x chose min dep is 0, it never append to CPC and should not test from now on,
            if x_dep_min == 0:
                deoZeroSet.append(x)
                sepset[x] = [j for j in sepset_temp]

            elif x_dep_min > vari_all_dep_max:
                vari_chose = x
                vari_all_dep_max = x_dep_min

        # print("x chosed is: " + str(vari_chose)+" and its dep is: " + str(vari_all_dep_max))
        if vari_all_dep_max >= 0:
            # print("CPC append is: "+ str(vari_chose))
            CPC.append(vari_chose)
        else:
            # CPC has not changed(In other world,CPC not append new), circulate should be break
            break
    # print("CPC is:" +str(CPC))
    """phaseII :Backward"""
    # print("shrinking phase begin")

    CPC_temp = CPC.copy()
    max_k = 3
    for a in CPC_temp:
        C_subsets = [i for i in CPC if i != a]

        # please see explanation of the function of getMinDep() explanation
        # the chinese annotation ,if you see,you will know.
        if len(C_subsets) > max_k:
            C_length = max_k
        else:
            C_length = len(C_subsets)

        breakFlag = False
        for length in range(C_length + 1):
            if breakFlag:
                break
            SS = subsets(C_subsets, length)
            for S in SS:
                ci_number += 1
                pval, dep = cond_indep_test(data, target, a, S, is_discrete)
                if pval > alpha:
                    CPC.remove(a)
                    breakFlag = True
                    break

    return list(set(CPC)), sepset, ci_number
예제 #18
0
파일: GSBN.py 프로젝트: wt-hu/pyCausalFS
def GSBN(data, alpha, is_discrete):
    _, kvar = np.shape(data)
    max_k = 3
    all_MB = [[] for i in range(kvar)]
    all_neighbor = [[] for i in range(kvar)]
    DAG = np.zeros((kvar, kvar))
    # Compute Markov Blankets

    # Set initial cache value
    dict_cache = {}
    dict_cache.setdefault("cache", [0, 0])

    for i in range(kvar):
        MB, _ = GSMB(data, i, alpha, is_discrete)
        for j in MB:
            DAG[i, j] = 1

    # # AND Rule
    # for i in range(kvar):
    #     for j in range(0, i):
    #         if DAG[i, j] != DAG[j, i]:
    #             DAG[i, j] = 0
    #             DAG[j, i] = 0

    # OR Rule
    for i in range(kvar):
        for j in range(0, i):
            if DAG[i, j] != DAG[j, i]:
                DAG[i, j] = 1
                DAG[j, i] = 1

    for i in range(kvar):
        for j in range(kvar):
            if DAG[i, j] == 1:
                all_MB[i].append(j)

    # Compute Graph Structure
    for x in range(kvar):
        for y in all_MB[x]:
            vs = set(all_MB[x]).union(set(all_MB[y]))
            varis = [i for i in vs if i != x and i != y]
            k = 0
            break_flag = False
            while len(varis) > k and k <= max_k:
                ss = subsets(varis, k)
                for s in ss:
                    pval, _ = cond_indep_test(data, x, y, s, is_discrete)
                    if pval > alpha:
                        DAG[x, y] = 0
                        DAG[x, y] = 0
                        break_flag = True
                        break
                if break_flag:
                    break
                k += 1

    for i in range(kvar):
        for j in range(kvar):
            if DAG[x, y] == 1:
                all_neighbor[i].append(j)

    PP = DAG.copy()
    pdag = DAG.copy()
    G = DAG.copy()
    print("2")
    # Orient Edges
    for x in range(kvar):
        for y in all_neighbor[x]:

            PP[y, x] = -1
            nz_vars = [
                i for i in all_neighbor[x]
                if i != y and i not in all_neighbor[y]
            ]
            for z in nz_vars:

                vs_vars = set(all_neighbor[y]).union(all_neighbor[z])
                vs = [i for i in vs_vars if i != z and i != y]
                k = 0
                break_flag = False
                while len(vs) > k and k <= max_k:
                    ss = subsets(vs, k)

                    for s in ss:
                        con_set = [i for i in s]
                        con_set.append(x)
                        con_set = list(set(con_set))
                        pval, _ = cond_indep_test(data, y, z, con_set,
                                                  is_discrete)
                        if pval > alpha:
                            PP[y, x] = 1
                            break_flag = True
                            break
                    if break_flag:
                        break

                    k += 1

                if PP[y, x] == -1:
                    pdag[y, x] = -1
                    pdag[x, y] = 0

                    G[y, x] = 1
                    G[x, y] = 0

    print("3")
    # Remove Cycles
    [DAG, pdag, G] = meek(DAG, pdag, G, kvar)

    return pdag


# import warnings
# warnings.filterwarnings('ignore')
# import pandas as pd
# data = pd.read_csv("D:/data/Alarm_data/Alarm1_s5000_v7.csv")
# print("the file read")
# import numpy as np
# num1, kvar = np.shape(data)
# alpha = 0.01
#
# pdag, dic = GSBN(data, alpha, True)
# print(pdag)
# for i in range(kvar):
#     for j in range(kvar):
#         if pdag[i, j] == -1:
#             print("i: ", i, " ,j: ", j)
# print(dic["cache"][0]/(dic["cache"][0]+dic["cache"][1]))
예제 #19
0
def semi_HITON_PC(data, target, alaph, is_disrete=True):
    n, p = np.shape(data)
    ci_number = 0
    candidate_pc = []
    S = []
    current_pc = []
    sep = [[] for i in range(p)]
    con = [i for i in range(p) if i != target]
    for x in con:
        ci_number += 1
        pval, dep = cond_indep_test(data, target, x, [], is_disrete)
        if pval <= alaph:
            S.append([x, dep])

    depset = sorted(S, key=lambda x: x[1], reverse=True)
    for i in range(len(depset)):
        candidate_pc.append(depset[i][0])  # RANK

    for x in candidate_pc:
        breakflag = False
        current_pc.append(x)
        conditions_set = [i for i in current_pc if i != x]
        # print("conditions_set is " + str(conditions_set))
        if len(conditions_set) >= 3:
            Slength = 3
        else:
            Slength = len(conditions_set)
        for j in range(Slength + 1):
            SS = subsets(conditions_set, j)
            for s in SS:
                ci_number += 1
                pval, _ = cond_indep_test(data, x, target, s, is_disrete)
                if pval > alaph:
                    sep[x] = [i for i in s]
                    current_pc.remove(x)
                    breakflag = True
                    break
            if breakflag:
                break

    # backforward phase except the last add variable
    Last_added = None
    if len(current_pc) > 0:
        Last_added = current_pc[-1]

    current_temp = current_pc.copy()
    for x in current_temp:
        flag = False
        if x == Last_added:
            continue
        con_set = [i for i in current_pc if i != x]
        if len(con_set) >= 3:
            leng = 3
        else:
            leng = len(con_set)
        for j in range(leng + 1):
            SS = subsets(con_set, j)
            for s in SS:
                ci_number += 1
                pval, _ = cond_indep_test(data, x, target, s, is_disrete)
                if pval > alaph:
                    current_pc.remove(x)
                    sep[x] = [i for i in s]
                    flag = True
                    break
            if flag:
                break
    return list(set(current_pc)), sep, ci_number
예제 #20
0
파일: MBOR.py 프로젝트: jhj111/pyCausalFS
def MBOR(data, target, alaph, is_discrete=True):
    _, kVar = np.shape(data)
    max_k = 3
    ci_number = 0

    PCS, d_sep, ci_num = PCSuperSet(data, target, alaph, is_discrete)
    ci_number += ci_num
    SPS, ci_num = SPSuperSet(data, target, PCS, d_sep, alaph, is_discrete)
    ci_number += ci_num
    MBS = list(set(PCS).union(set(SPS)))

    drop_data_attribute = [
        str(i) for i in range(kVar) if i != target and i not in MBS
    ]
    data_new = data.drop(drop_data_attribute, axis=1)
    data_attribute = [i for i in range(kVar) if i == target or i in MBS]

    PC, ci_num = MBtoPC(data_new, target, alaph, data_attribute, is_discrete)
    ci_number += ci_num
    PCS_rmPC = [i for i in PCS if i not in PC]
    for x in PCS_rmPC:
        x_pcset, ci_num = MBtoPC(data_new, x, alaph, data_attribute,
                                 is_discrete)

        ci_number += ci_num
        if target in x_pcset:
            PC.append(x)

    SP = []
    for x in PC:
        data_attribute = [i for i in range(kVar) if i != target]
        x_pcset, ci_num = MBtoPC(data, x, alaph, data_attribute, is_discrete)
        ci_number += ci_num
        vari_set = [i for i in x_pcset if i != target and i not in PC]
        for y in vari_set:
            break_flag = False
            condition_all_set = [i for i in MBS if i != target and i != y]
            clength = len(condition_all_set)
            if clength > max_k:
                clength = max_k
            for j in range(clength + 1):
                condition_set = subsets(condition_all_set, j)
                for Z in condition_set:
                    ci_number += 1
                    pval, _ = cond_indep_test(data, target, y, Z, is_discrete)
                    if pval > alaph:
                        if break_flag:
                            break
                        else:
                            # Find minimal Z ⊂ MBS\{T ∪ Y } such that T ⊥ Y |Z
                            break_flag = True
                            condition_varis = [i for i in Z]
                            condition_varis.append(x)
                            condition_varis = list(set(condition_varis))
                            ci_number += 1
                            pval, _ = cond_indep_test(data, target, y,
                                                      condition_varis,
                                                      is_discrete)
                            if pval <= alaph:
                                SP.append(y)
                if break_flag:
                    break

    MB = list(set(PC).union(set(SP)))
    return MB, ci_number
예제 #21
0
def interIAMBnPC(data, target, alaph):
    n, p = np.shape(data)
    BT = []
    ntest = 0
    #depmax=-float("inf")
    #feature=-1
    #pval_temp=1.0
    length = -1
    removeSet = []
    rmNumberSet = [0 for i in range(p)]
    while len(BT) != length:
        depmax = -float("inf")
        feature = -1
        pval_temp = 1.0
        length = len(BT)
        #growing phase
        S = [
            i for i in range(p)
            if i != target and i not in BT and i not in removeSet
        ]
        for X in S:
            ntest += 1
            pval, dep = chi_square_test(data, X, target, BT)
            if dep > depmax:
                depmax = dep
                feature = X
                pval_temp = pval
        if pval_temp <= alaph:
            BT.append(feature)

        #shrinking phase
        mb_index = len(BT)
        # 逆序
        while mb_index >= 0:
            mb_index -= 1
            x = BT[mb_index]

            ntest += 1

            conditionvars = [i for i in BT if i != x]
            pval_sp, dep_sp = chi_square_test(data, target, x, conditionvars)
            if pval_sp > alaph:
                BT.remove(x)
                # remove the variables while have be append to MBs just,lead to circulation break
                if x == feature:
                    break

                rmNumberSet[x] += 1
                if rmNumberSet[x] > 10:
                    removeSet.append(x)
    """shrinking phase"""
    TestMB = BT.copy()
    p = len(TestMB)
    DAG = np.ones((1, p))
    size = 0
    continueFlag = True
    # conditionSet maximum set 3
    max_k = 3
    while continueFlag:
        # Candidate of MB traverse
        for y in range(p):
            if DAG[0, y] == 0:
                continue
            conditionSet = [i for i in range(p) if i != y and DAG[0, i] == 1]
            SS = subsets(conditionSet, size)
            for S in SS:
                condtionVari = [TestMB[i] for i in S]
                ntest += 1
                pval, _ = chi_square_test(data, target, TestMB[y],
                                          condtionVari)
                if pval > alaph:
                    DAG[0, y] = 0
                    break
        size += 1
        continueFlag = False

        # circulate will be continue if condition suited
        if np.sum(DAG[0, :] == 1) >= size and size <= max_k:
            continueFlag = True
    # end while
    MB = [TestMB[i] for i in range(p) if DAG[0, i] == 1]

    return MB, ntest
예제 #22
0
def TIE(data, target, alaph, is_discrete=True):
    number, kVar = np.shape(data)
    M = []
    G = []
    max_k = 3
    not_in_set = []
    possible_subests = []
    variable = [i for i in range(kVar)]
    MB, _ = IAMB(data, target, alaph, variable, is_discrete)
    M.append(MB)
    G.append([])
    index = 0
    s_index = 0
    MB_new_set = []
    while True:
        length = len(M[index])
        if length > max_k:
            length = max_k
        for j in range(length + 1):
            if j == 0:
                continue
            varis_set = subsets(M[index], j)
            for x in varis_set:
                break_Flag = False
                for y in not_in_set:
                    if set(x).issuperset(set(y)):
                        break_Flag = True
                        break
                if not break_Flag:
                    vari_one = list(set(x).union(set(G[index])))

                    if vari_one not in possible_subests:
                        possible_subests.append(vari_one)

        if s_index < len(possible_subests):
            excpet_varis_set = possible_subests[s_index]
            s_index += 1
        else:
            break

        variable_new = [i for i in range(kVar) if i not in excpet_varis_set]
        MB_new, _ = IAMB(data, target, alaph, variable_new, is_discrete)
        different_set = list(set(MB).difference(set(MB_new)))
        if different_set == [] or MB_new == []:
            continue
        break_Flag = False
        for x in different_set:
            pval, _ = cond_indep_test(data, target, x, MB_new, is_discrete)
            if pval <= alaph:
                break_Flag = True
                not_in_set.append(excpet_varis_set)
                possible_subests_temp = possible_subests.copy()
                for y in possible_subests_temp:
                    if excpet_varis_set != y and set(y).issuperset(
                            excpet_varis_set):
                        possible_subests.remove(y)
                break
        if not break_Flag:
            MB_new_set.append(MB_new)
            M.append(MB_new)
            G.append(excpet_varis_set)
            index += 1
    return MB_new_set