Esempio n. 1
0
        data1.to_csv('D:/temp/data_labels.csv', index=False)
        data = pd.read_csv('D:/temp/data_labels.csv')

        number, nver = np.shape(data)
        target = nver - 1
        print(number)
        train_indices = []
        test_indices = []

        for index, method in enumerate(method_list):
            print(target)
            print(method)
            if method == "MMMB":
                MB, ci_num = MMMB(data, target, alaph, is_discrete)
            elif method == "IAMB":
                MB, ci_num = IAMB(data, target, alaph, is_discrete)
            elif method == "IAMBnPC":
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
            elif method == "inter_IAMB":
                MB, ci_num = inter_IAMB(data, target, alaph, is_discrete)
            elif method == "interIAMBnPC":
                MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete)
            elif method == "fast_IAMB":
                MB, ci_num = fast_IAMB(data, target, alaph, is_discrete)
            elif method == "GSMB":
                MB, ci_num = GSMB(data, target, alaph, is_discrete)
            elif method == "HITON_MB":
                MB, ci_num = HITON_MB(data, target, alaph, is_discrete)
            elif method == "PCMB":
                MB, ci_num = PCMB(data, target, alaph, is_discrete)
            elif method == "IPCMB":
def evaluation(method,
               path,
               all_number_Para,
               target_list,
               real_graph_path,
               is_discrete,
               filenumber=10,
               alaph=0.01,
               k=1):

    # pre_set variables is zero
    Precision = 0
    Recall = 0
    F1 = 0
    Distance = 0
    use_time = 0
    ci_number = 0
    realmb, realpc = realMB(all_number_Para, real_graph_path)
    length_targets = len(target_list)
    for m in range(filenumber):
        completePath = path + str(m + 1) + ".csv"
        data = pd.read_csv(completePath)
        number, kVar = np.shape(data)
        ResMB = [[]] * length_targets
        # print("\ndata set is: " + str(m+1) + ".csv")
        for i, target in enumerate(target_list):
            # print("target is: " + str(target))
            if method == "MMMB":
                start_time = time.process_time()
                MB, ci_num = MMMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IAMB":
                start_time = time.process_time()
                MB, ci_num = IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "KIAMB":
                start_time = time.process_time()
                MB, ci_num = KIAMB(data, target, alaph, k, is_discrete)
                end_time = time.process_time()
            elif method == "IAMBnPC":
                start_time = time.process_time()
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "inter_IAMB":
                start_time = time.process_time()
                MB, ci_num = inter_IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "interIAMBnPC":
                start_time = time.process_time()
                MB, ci_num = interIAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "fast_IAMB":
                start_time = time.process_time()
                MB, ci_num = fast_IAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "GSMB":
                start_time = time.process_time()
                MB, ci_num = GSMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "HITON_MB":
                start_time = time.process_time()
                MB, ci_num = HITON_MB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "PCMB":
                start_time = time.process_time()
                MB, ci_num = PCMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IPCMB":
                start_time = time.process_time()
                MB, ci_num = IPC_MB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "STMB":
                start_time = time.process_time()
                MB, ci_num = STMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "IAMBnPC":
                start_time = time.process_time()
                MB, ci_num = IAMBnPC(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "BAMB":
                start_time = time.process_time()
                MB, ci_num = BAMB(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "FBEDk":
                start_time = time.process_time()
                MB, ci_num = FBED(data, target, k, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "MBOR":
                start_time = time.process_time()
                MB, ci_num = MBOR(data, target, alaph, is_discrete)
                end_time = time.process_time()
            elif method == "LRH":
                start_time = time.process_time()
                MB, ci_num = LRH(data, target, alaph, is_discrete)
                end_time = time.process_time()
            else:
                raise Exception("method input error!")

            use_time += (end_time - start_time)
            ResMB[i] = MB
            ci_number += ci_num

        for n, target in enumerate(target_list):
            # print("target is: " + str(target) + " , n is: " + str(n))
            true_positive = list(
                set(realmb[target]).intersection(set(ResMB[n])))
            length_true_positive = len(true_positive)
            length_RealMB = len(realmb[target])
            length_ResMB = len(ResMB[n])
            if length_RealMB == 0:
                if length_ResMB == 0:
                    precision = 1
                    recall = 1
                    F1 += 1
                else:
                    F1 += 0
                    precision = 0
                    recall = 0
            else:
                if length_ResMB != 0:
                    precision = length_true_positive / length_ResMB
                    recall = length_true_positive / length_RealMB
                    if precision + recall != 0:
                        F1 += 2 * precision * recall / (precision + recall)
                else:
                    F1 += 0
                    precision = 0
                    recall = 0
            distance = ((1 - precision)**2 + (1 - recall)**2)**0.5
            Distance += distance
            Precision += precision
            Recall += recall

        # print("current average Precision is: " + str(Precision / ((m+1) * (numberPara))))
        # print("current average Recall is: " + str(Recall / ((m+1) * (numberPara))))

    commonDivisor = length_targets * filenumber

    # 标准差

    return F1 / commonDivisor, Precision / commonDivisor, Recall / commonDivisor, Distance / \
        commonDivisor, ci_number / commonDivisor, use_time / commonDivisor
Esempio n. 3
0
def example(method, data, list_target, alpha, is_discrete, k=0):
    file = open("../output/mb.txt", "w+")
    if method == "MMMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = MMMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "inter_IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = inter_IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "fast_IAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = fast_IAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "GSMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = GSMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "HITON_MB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = HITON_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "semi_HITON_MB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = semi_HITON_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "PCMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = PCMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IPCMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IPC_MB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "STMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = STMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "IAMBnPC":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = IAMBnPC(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "interIAMBnPC":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = interIAMBnPC(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "BAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = BAMB(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "FBEDk":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = FBED(data, target, k, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "MBOR":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = MBOR(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "LRH":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = LRH(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "KIAMB":
        start_time = time.process_time()
        for target in list_target:
            MB, ci_num = KIAMB(data, target, alpha, k, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "TIE":
        start_time = time.process_time()
        for target in list_target:
            MB = TIE(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    elif method == "TIE_p":
        start_time = time.process_time()
        for target in list_target:
            MB = TIE_p(data, target, alpha, is_discrete)
            file.write("the MB of " + str(target) + " is:" + str(MB) + "\n")
            print("the MB of " + str(target) + " is:" + str(MB))
        end_time = time.process_time()
    else:
        raise Exception("method input error!")

    print("the running time is: " + str(end_time - start_time))
    file.write("the running time is: " + str(end_time - start_time) + "\n")
    file.close()
Esempio n. 4
0
def MB_by_MB(data, target, alaph, is_discrete=True):
    n, p = np.shape(data)
    Donelist = []  # whose MBs have been found
    Waitlist = [target]  # whose MBs will be foundM
    G = np.zeros((p, p))  # 1 denotes ->, 0 denote no edges
    pdag = G.copy()  # -1 denotes ->, 0 denote no edges
    DAG = G.copy()  # 1 denote -,0 denote no edges
    MB = [[] for i in range(p)]
    sepset = [[[]] * p for i in range(p)]
    k = 3
    while Waitlist != []:
        stop = False
        Waitlist_temp = Waitlist.copy()
        for x in Waitlist_temp:
            spouse = [[] for i in range(p)]
            Donelist.append(x)
            Waitlist.remove(x)
            MB[x], _ = IAMB(data, x, alaph, is_discrete)
            for i in MB[x]:
                Waitlist.append(i)
            findflag = False
            for i in range(len(MB)):
                if set(MB[x]) < set(MB[i]):
                    findflag = True
                    break
            if set(MB[x]) <= set(Donelist):
                findflag = True
            if findflag:
                continue
            # find spouse and pc
            # print("find spouse and pc")
            pc = MB[x].copy()
            # print("MB is " + str(MB))
            for i in range(len(MB[x])):
                cutsetsize = 0
                break_flag = 0
                c = MB[x][i]
                # print("c is " + str(c))
                CanPC = [i for i in MB[x] if i != c]
                # print("CanPC is " + str(CanPC))
                while len(CanPC) >= cutsetsize and cutsetsize <= k:
                    SS = subsets(CanPC, cutsetsize)
                    # print("SS is " + str(SS))
                    for s in SS:
                        # print("s is " + str(s))
                        pval, _ = cond_indep_test(data, x, c, s, is_discrete)
                        # print("pval is " + str(pval))
                        if pval <= alaph:
                            continue
                        else:
                            sepset[x][c] = s
                            # print("sepset[x][c] is " + str(sepset[x][c]))
                            pc.remove(c)
                            break_flag = True
                            break
                    if break_flag:
                        break
                    cutsetsize += 1
            # print("pc is " + str(pc))
            rest = [i for i in MB[x] if i not in pc]
            # print("rest is " + str(rest))
            for i in range(len(rest)):
                for j in range(len(pc)):
                    if pc[j] in sepset[x][rest[i]]:
                        continue
                    condition = [str(m) for m in sepset[x][rest[i]]]
                    # print("before condition is " + str(condition))
                    condition = list(set(condition).union(set(str(rest[i]))))
                    # print("condition is " + str(condition))
                    pval, _ = cond_indep_test(data, rest[i], x, condition,
                                              is_discrete)
                    # print("pval is "+ str(pval))
                    if pval <= alaph or math.isnan(pval):
                        spouse[j].append(rest[i])

            # print("v-structure")
            # print("spouse is " + str(spouse))
            # construct v-strcture
            for i in range(len(pc)):
                b = pc[i]
                DAG[x, b] = 1
                DAG[b, x] = 1
                if pdag[x, b] == 0 and pdag[b, x] == 0:
                    pdag[x, b] = 1
                    pdag[b, x] = 1
                    G[x, b] = 1
                    G[b, x] = 1
                if len(spouse[i]) > 0:
                    for j in range(len(spouse[i])):
                        c = spouse[i][j]
                        DAG[c, b] = 1
                        DAG[b, c] = 1
                        DAG[x, c] = 0
                        DAG[c, x] = 0
                        pdag[x, b] = -1
                        pdag[c, b] = -1
                        pdag[b, x] = 0
                        pdag[b, c] = 0
                        pdag[x, c] = 0
                        pdag[c, x] = 0
                        G[x, b] = 1
                        G[c, b] = 1
                        G[b, x] = 0
                        G[b, c] = 0
                        G[c, x] = 0
                        G[x, c] = 0
                        # pdag[b, x] = -1;pdag[b, c] = -1;pdag[x, b] = 0;pdag[c, b] = 0;pdag[c, x] = 0;pdag[x, c] = 0
                        # G[b, x] = 1;G[b, c] = 1;G[x, b] = 0;G[c, b] = 0;G[x, c] = 0;G[c, x] = 0
            # oriented by meek approach
            # print("meek")
            pDAG = Meek(DAG, pDAG, data)
            # if all edges connected to T are oriented
            stop = True
            connect = [i for i in range(p)
                       if DAG[target, i] == 1]  # all nodes connected to target
            # print("connect is " + str(connect))
            for i in connect:
                if pdag[target, i] != -1 and pdag[i, target] != -1:
                    stop = False
                    break
            if stop:
                break
        if stop:
            break
        # print("Donelist is " + str(Donelist))
        # print("Waitlist is " + str(Waitlist))
        Waitlist = list(set(Waitlist))
        for i in Donelist:
            if i in Waitlist:
                Waitlist.remove(i)
        # print("Waitlist is " + str(Waitlist))
    np.transpose(G)
    np.transpose(pdag)
    parents = [i for i in range(p) if pdag[i, target] == -1]
    children = [i for i in range(p) if pdag[target, i] == -1]
    undirected = [i for i in range(p) if pdag[target, i] == 1]
    return parents, children, undirected


# # data = pd.read_csv("F:\cai_algorithm\data\Child_s500_v1.csv")
# data = pd.read_csv("F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.csv")
# # path = "F:\cai_algorithm\Alarm_data\Alarm1_s500_v1.txt"
# # data = np.loadtxt(path, dtype=None, delimiter= ' ')
# target = 0
# Graph, p, c = MB_by_MB(data,target,0.01)
# print("\nin the last -------------------------------------")
# print(Graph)
# print("target " + str(target) + " parents are " + str(p))
# print("target " + str(target) + " children are " + str(c))