예제 #1
0
파일: te12.py 프로젝트: ziipeen/pyCausalFS
        data1 = pd.concat([pd.DataFrame(train_data), pd.DataFrame(train_label)], axis=1, ignore_index=True)
        #  Best of a bad bunch
        data1.to_csv('D:/temp/data_labels.csv', index=False)
        data = pd.read_csv('D:/temp/data_labels.csv')

        number, nver = np.shape(data)
        target = nver - 1
        print(number)
        train_indices = []
        test_indices = []

        for index, method in enumerate(method_list):
            print(target)
            print(method)
            if method == "MMMB":
                MB, ci_num = MMMB(data, target, alaph, is_discrete)
            elif method == "IAMB":
                MB, ci_num = IAMB(data, target, alaph, is_discrete)
            elif method == "IAMBnPC":
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
            elif method == "inter_IAMB":
                MB, ci_num = inter_IAMB(data, target, alaph, is_discrete)
            elif method == "interIAMBnPC":
                MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete)
            elif method == "fast_IAMB":
                MB, ci_num = fast_IAMB(data, target, alaph, is_discrete)
            elif method == "GSMB":
                MB, ci_num = GSMB(data, target, alaph, is_discrete)
            elif method == "HITON_MB":
                MB, ci_num = HITON_MB(data, target, alaph, is_discrete)
            elif method == "PCMB":
예제 #2
0
def example(method, data, list_target, alpha, is_discrete, k=0):
    file = open("../output/mb.txt", "w+")
    if method == "MMMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = MMMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "inter_IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = inter_IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "fast_IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = fast_IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "GSMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = GSMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "HITON_MB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = HITON_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "semi_HITON_MB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = semi_HITON_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "PCMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = PCMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IPCMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IPC_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "STMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = STMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IAMBnPC":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IAMBnPC(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "interIAMBnPC":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = interIAMBnPC(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "BAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = BAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "FBEDk":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = FBED(data, target, k, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "MBOR":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = MBOR(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "LRH":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = LRH(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "KIAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = KIAMB(data, target, alpha, k, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "TIE":
        start_time = time.process_time()
        for target in list_target:
            MB = TIE(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "TIE_p":
        start_time = time.process_time()
        for target in list_target:
            MB = TIE_p(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    else:
        raise Exception("method input error!")

    print("the running time is: " + str(end_time - start_time))
    file.write("the running time is: " + str(end_time - start_time) + "\n")
    file.close()
예제 #3
0
def MBbyMB(data, target, alpha, is_discrete=True):

    ci_test = 0
    max_k = 3
    _, kvar = np.shape(data)
    DAG = np.zeros((kvar, kvar))
    pdag = DAG.copy()
    G = DAG.copy()
    mb_calcualted = [True for i in range(kvar)]
    all_pc = [[] for i in range(kvar)]
    all_mb = [[] for i in range(kvar)]
    all_can_spouse = [[] for i in range(kvar)]
    all_sepset = [[[]] * kvar for i in range(kvar)]
    Q = [target]
    tmp = []

    num_calculated = 0

    while len(tmp) <= kvar and len(Q) > 0:
        A = Q[0]
        del Q[0]
        if A in tmp:
            continue
        else:
            tmp.append(A)

        # get MB(A)
        if mb_calcualted[A]:
            all_mb[A], ntest = MMMB(data, A, alpha, is_discrete)
            ci_test += ntest
            mb_calcualted[A] = False

        all_pc[A] = all_mb[A].copy()

        for B in all_mb[A]:
            Q.append(B)
            DAG[A, B] = 1
            DAG[B, A] = 1
            if pdag[A, B] == 0 and pdag[B, A] == 0:
                pdag[A, B] = 1
                pdag[B, A] = 1
                G[A, B] = 1
                G[B, A] = 1

            cutSetSize = 0
            break_flag = False
            can_pc = [i for i in all_mb[A] if i != B]
            while len(can_pc) >= cutSetSize and cutSetSize <= max_k:
                SS = subsets(can_pc, cutSetSize)
                for z in SS:
                    ci_test += 1
                    pval, _ = cond_indep_test(data, B, A, z, is_discrete)

                    if pval > alpha:
                        all_sepset[A][B] = [i for i in z]
                        all_sepset[B][A] = [i for i in z]

                        DAG[A, B] = 0
                        DAG[B, A] = 0
                        pdag[A, B] = 0
                        pdag[B, A] = 0
                        G[A, B] = 0
                        G[B, A] = 0

                        all_pc[A] = [i for i in all_pc[A] if i != B]
                        all_can_spouse[A].append(B)

                        break_flag = True
                        break
                if break_flag:
                    break
                cutSetSize += 1
        # print("all_sepset: ", all_sepset)
        # find v-structures
        for C in all_can_spouse[A]:
            for B in all_pc[A]:

                # A->B<-C
                if B not in all_sepset[A][C]:
                    DAG[A, B] = 1
                    DAG[B, A] = 1

                    pdag[A, B] = -1
                    pdag[B, A] = 0

                    pdag[C, B] = -1
                    pdag[B, C] = 0

                    G[A, B] = 1
                    G[B, A] = 0

                    G[C, B] = 1
                    G[B, C] = 0

        [DAG, pdag, G] = meek(DAG, pdag, G, kvar)

        num_calculated += 1
        if num_calculated > len(all_mb[target]):
            if 1 not in pdag[target, :] and 1 not in pdag[:, target]:
                break

    parents = [i for i in range(kvar) if pdag[i, target] == -1]
    children = [i for i in range(kvar) if pdag[target, i] == -1]
    undirected = [i for i in range(kvar) if pdag[target, i] == 1]
    PC = list(set(parents).union(set(children)).union(set(undirected)))

    return parents, children, PC, undirected


# import warnings
# warnings.filterwarnings('ignore')
# import pandas as pd
# data = pd.read_csv("D:/data/alarm_data/Alarm1_s5000_v6.csv")
# print("the file read")
# import numpy as np
# num1, kvar = np.shape(data)
# alaph = 0.01
#
# for target in range(kvar):
#     P, C, PC, und = MBbyMB(data, target, alaph, True)
#     print(target," -P: ", P, " ,C: ", C, " ,PC: ", PC, " ,undire: ",und)
예제 #4
0
def evaluation(method,
               path,
               all_number_Para,
               target_list,
               real_graph_path,
               is_discrete,
               filenumber=10,
               alaph=0.01,
               k=1):

    # pre_set variables is zero
    Precision = 0
    Recall = 0
    F1 = 0
    Distance = 0
    use_time = 0
    ci_number = 0
    realmb, realpc = realMB(all_number_Para, real_graph_path)
    length_targets = len(target_list)
    for m in range(filenumber):
        completePath = path + str(m + 1) + ".csv"
        data = pd.read_csv(completePath)
        number, kVar = np.shape(data)
        ResMB = [[]] * length_targets
        # print("\ndata set is: " + str(m+1) + ".csv")
        for i, target in enumerate(target_list):
            # print("target is: " + str(target))
            if method == "MMMB":
                start_time = time.process_time()
                MB, ci_num = MMMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IAMB":
                start_time = time.process_time()
                MB, ci_num = IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "KIAMB":
                start_time = time.process_time()
                MB, ci_num = KIAMB(data, target, alaph, k, is_discrete)
                end_time = time.process_time()
            elif method == "IAMBnPC":
                start_time = time.process_time()
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "inter_IAMB":
                start_time = time.process_time()
                MB, ci_num = inter_IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "interIAMBnPC":
                start_time = time.process_time()
                MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "fast_IAMB":
                start_time = time.process_time()
                MB, ci_num = fast_IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "GSMB":
                start_time = time.process_time()
                MB, ci_num = GSMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "HITON_MB":
                start_time = time.process_time()
                MB, ci_num = HITON_MB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "PCMB":
                start_time = time.process_time()
                MB, ci_num = PCMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IPCMB":
                start_time = time.process_time()
                MB, ci_num = IPC_MB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "STMB":
                start_time = time.process_time()
                MB, ci_num = STMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IAMBnPC":
                start_time = time.process_time()
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "BAMB":
                start_time = time.process_time()
                MB, ci_num = BAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "FBEDk":
                start_time = time.process_time()
                MB, ci_num = FBED(data, target, k, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "MBOR":
                start_time = time.process_time()
                MB, ci_num = MBOR(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "LRH":
                start_time = time.process_time()
                MB, ci_num = LRH(data, target, alaph, is_discrete)
                end_time = time.process_time()
            else:
                raise Exception("method input error!")

            use_time += (end_time - start_time)
            ResMB[i] = MB
            ci_number += ci_num

        for n, target in enumerate(target_list):
            # print("target is: " + str(target) + " , n is: " + str(n))
            true_positive = list(
                set(realmb[target]).intersection(set(ResMB[n])))
            length_true_positive = len(true_positive)
            length_RealMB = len(realmb[target])
            length_ResMB = len(ResMB[n])
            if length_RealMB == 0:
                if length_ResMB == 0:
                    precision = 1
                    recall = 1
                    F1 += 1
                else:
                    F1 += 0
                    precision = 0
                    recall = 0
            else:
                if length_ResMB != 0:
                    precision = length_true_positive / length_ResMB
                    recall = length_true_positive / length_RealMB
                    if precision + recall != 0:
                        F1 += 2 * precision * recall / (precision + recall)
                else:
                    F1 += 0
                    precision = 0
                    recall = 0
            distance = ((1 - precision)**2 + (1 - recall)**2)**0.5
            Distance += distance
            Precision += precision
            Recall += recall

        # print("current average Precision is: " + str(Precision / ((m+1) * (numberPara))))
        # print("current average Recall is: " + str(Recall / ((m+1) * (numberPara))))

    commonDivisor = length_targets * filenumber

    # 标准差

    return F1 / commonDivisor, Precision / commonDivisor, Recall / commonDivisor, Distance / \
        commonDivisor, ci_number / commonDivisor, use_time / commonDivisor
예제 #5
0
def MBGSL(data, alpha, is_discrete, selected):
    _, kvar = np.shape(data)
    max_k = 3
    all_MB = [[] for i in range(kvar)]
    all_neighbor = [[] for i in range(kvar)]
    PP = np.zeros((kvar, kvar))
    num_CI = 0

    for i in range(kvar):
        if selected == 1:
            MB, n_c = MMMB(data, i, alpha, is_discrete)
        elif selected == 2:
            MB, n_c = HITON_MB(data, i, alpha, is_discrete)
        elif selected == 3:
            MB, n_c = semi_HITON_MB(data, i, alpha, is_discrete)
        else:
            MB, n_c, dict_cache = PCMB(data, i, alpha, is_discrete)
        num_CI += n_c
        for j in MB:
            PP[i, j] = 1

    # # AND Rule
    # for i in range(kvar):
    #     for j in range(0, i):
    #         if DAG[i, j] != DAG[j, i]:
    #             DAG[i, j] = 0
    #             DAG[j, i] = 0

    for i in range(kvar):
        for j in range(0, i):
            if PP[i, j] != PP[j, i]:
                PP[i, j] = 1
                PP[j, i] = 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_MB[i].append(j)

    # removes the possible spouse links between linked variables X and Y
    for x in range(kvar):
        for y in all_MB[x]:
            vs = set(all_MB[x]).union(set(all_MB[y]))
            varis = list((set(all_MB[x]).difference([y])).union(
                set(all_MB[y]).difference([x])))
            k = 0
            break_flag = False
            while len(varis) > k and k <= max_k:
                ss = subsets(varis, k)
                for s in ss:

                    num_CI += 1
                    pval, _ = cond_indep_test(data, x, y, s, is_discrete)
                    if pval > alpha:
                        PP[x, y] = 0
                        PP[x, y] = 0
                        break_flag = True
                        break
                if break_flag:
                    break
                k += 1

    for i in range(kvar):
        for j in range(kvar):
            if PP[i, j] == 1:
                all_neighbor[i].append(j)

    DAG = PP.copy()
    pdag = DAG.copy()
    G = DAG.copy()

    # orient edges
    for x in range(kvar):
        for y in all_neighbor[x]:
            sz = list((set(all_neighbor[x]).difference(
                all_neighbor[y])).difference([y]))
            for z in sz:
                PP[y, x] = -1
                B = list((set(all_MB[y]).difference([z])).union(
                    set(all_MB[z]).difference([y])))
                break_flag = False
                cutSetSize = 0
                while len(B) >= cutSetSize and cutSetSize == 0:
                    SS = subsets(B, cutSetSize)
                    for s in SS:
                        cond_s = list(set(s).union([x]))

                        num_CI += 1
                        pval, _ = cond_indep_test(data, y, z, cond_s,
                                                  is_discrete)
                        if pval > alpha:
                            PP[y, x] = 1
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutSetSize += 1
            if PP[y, x] == -1:
                pdag[y, x] = -1
                pdag[x, y] = 0
                G[y, x] = 1
                G[x, y] = 0
                break

    DAG, pdag, G = meek(DAG, pdag, G, kvar)

    return pdag, num_CI