예제 #1
0
파일: MMMB.py 프로젝트: jhj111/pyCausalFS
def MMMB(data, target, alaph, is_discrete=True):
    ci_number = 0
    PC, sepset, ci_num2 = MMPC(data, target, alaph, is_discrete)
    ci_number += ci_num2
    # print("PC is: " + str(PC))
    # print("sepset is: " + str(sepset))
    MB = PC.copy()
    for x in PC:
        # print(x)
        PCofPC, _, ci_num3 = MMPC(data, x, alaph, is_discrete)
        ci_number += ci_num3
        # print("PCofPC is: " + str(PCofPC))
        for y in PCofPC:
            # print("_-++++++-_")
            if y != target and y not in PC:
                conditions_Set = [str(i) for i in sepset[y]]
                conditions_Set.append(str(x))
                conditions_Set = list(set(conditions_Set))
                ci_number += 1
                pval, dep = cond_indep_test(data, target, y, conditions_Set,
                                            is_discrete)
                # print("_----_")
                if pval <= alaph:
                    MB.append(y)
                    break
    return MB, ci_number
예제 #2
0
def MMHC(data, alpha=0.01, score_function='bdeu'):
    # input:
    # data: input training data, the data must be discrete
    # score: the type of score function, currently support 'bdeu', 'bic'
    # threshold: threshold for CI test
    # output:
    # dag: a direct graph

    _, kvar = np.shape(data)
    DAG = np.zeros((kvar, kvar))
    pc = {}
    num_CI = 0
    for tar in range(kvar):
        pc_mm, _, n_c = MMPC(data, tar, alpha, True)
        num_CI += n_c
        for i in pc_mm:
            DAG[tar, i] = 1
            DAG[i, tar] = 1
        pc[str(tar)] = [str(i) for i in pc_mm]
    # check the symmetry of pc set
    # when the number of variables is large, this function may be computational costly
    # this function can be merged into the pruning process during forward and backward mmpc by transmitting the whole
    # pc set into mmpc_forward and mmpc_backward
    pc = symmetry(pc)
    # run hill-climbing

    dag_dict = hc(data, pc, score_function)

    # orient the edge
    for key, value in dag_dict.items():
        x = int(key)
        for i in value['parents']:
            y = int(i)
            DAG[y, x] = -1
            DAG[x, y] = 0
        for i in value['children']:
            z = int(i)
            DAG[x, z] = -1
            DAG[z, x] = 0

    return DAG, num_CI


# import pandas as pd
#
# from CBD.MBs.common.realMB import realMB
# from LSL.MBs.common.real_P_C_S import real_p_c_s
# if __name__ == '__main__':
#
#     data = pd.read_csv('D:/data/child_data/Child_s5000_v2.csv')
#     real_graph_path = "D:/data/child_data/Child_graph.txt"
#     _, all_number = np.shape(data)
#     real_p, real_c, real_s = real_p_c_s(all_number, real_graph_path)
#     print("real_p is:", real_p)
#     print("real_c is:", real_c)
#     DAG = MMHC(data)
#     print(DAG)
#     with open(r".\result.txt", "a+") as file:
#         file.write(str(DAG))
예제 #3
0
def example(method, data, list_target, alpha=0.01, is_discrete=True):
    file = open("../output/pc.txt", "w+")
    if method == "MBtoPC":
        _, kVar = numpy.shape(data)
        start_time = time.process_time()
        for target in list_target:
            pc, ci_num = MBtoPC(data, target, alpha, [i for i in range(kVar)],
                                is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    elif method == "pc_simple":
        start_time = time.process_time()
        for target in list_target:
            pc, ci_num = pc_simple(data, target, alpha, is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    elif method == "HITON_PC":
        start_time = time.process_time()
        for target in list_target:
            pc, _, _ = HITON_PC(data, target, alpha, is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    elif method == "MMPC":
        start_time = time.process_time()
        for target in list_target:
            pc, _, _ = MMPC(data, target, alpha, is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    elif method == "getPC":
        start_time = time.process_time()
        for target in list_target:
            pc, _, _ = getPC(data, target, alpha, is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    elif method == "semi_HITON_PC":
        start_time = time.process_time()
        for target in list_target:
            pc, _, _ = semi_HITON_PC(data, target, alpha, is_discrete)
            file.write("the pc of " + str(target) + " is:" + str(pc) + "\n")
            print("the pc of " + str(target) + " is:" + str(pc))
        end_time = time.process_time()
    else:
        raise Exception("method input error!")

    print("the running time is: " + str(end_time - start_time))
    file.write("the running time is: " + str(end_time - start_time) + "\n")
    file.close()
예제 #4
0
def PCDbyPCD(data, target, alaph, is_discrete=True):
    _, kVar = np.shape(data)
    DAG = np.zeros((kVar, kVar))
    pDAG = DAG.copy()
    G = DAG.copy()
    sepset_all = [[[]]] * kVar
    PCD_set_all = [[]] * kVar
    tmp = []
    Q = [target]
    parents = []
    children = []
    undirected = []
    lnum = 0

    while len(tmp) <= kVar and Q != []:
        A = Q[0]
        # print("A is: " + str(A))
        del Q[0]
        if A in tmp:
            continue
        else:
            tmp.append(A)

        #Get PC(A)
        if PCD_set_all[A] == []:
            PCD_set_all[A], sepset_all[A], _ = MMPC(data, A, alaph,
                                                    is_discrete)
        for B in PCD_set_all[A]:
            # print("B is: " + str(B))
            Q.append(B)
            # if PCD_set_all[B] == []:
            #      PCD_set_all[B], sepset_all[B], _ = MMPC(data, B, alaph)
            if A not in PCD_set_all[B]:
                continue

            DAG[A, B] = 1
            DAG[B, A] = 1

            if pDAG[A, B] == 0 and pDAG[B, A] == 0:
                pDAG[A, B] = 1
                pDAG[B, A] = 1
                G[A, B] = 1
                G[B, A] = 1

            for C in PCD_set_all[B]:
                # if PCD_set_all[C] == []:
                #     PCD_set_all[C], sepset_all[C], _ = MMPC(data, C, alaph)

                # if B not in PCD_set_all[C]:
                #      continue
                #
                # DAG[C, B] = 1
                # DAG[B, C] = 1
                #
                # if pDAG[C, B] == 0 and pDAG[B, C] == 0:
                #     pDAG[C, B] = 1
                #     pDAG[B, C] = 1
                #     G[C, B] = 1
                #     G[B, C] = 1

                if C in PCD_set_all[A] or C == A:
                    continue

                # v-structure
                if DAG[C, B] == 1 and DAG[B, C] == 1:
                    if B not in sepset_all[A][C]:
                        pDAG[A, B] = -1
                        pDAG[B, A] = 0
                        pDAG[C, B] = -1
                        pDAG[B, C] = 0

                        G[A, B] = 1
                        G[B, A] = 0
                        G[C, B] = 1
                        G[B, C] = 0

        pDAG = Meek(DAG, pDAG, data)

        lnum += 1
        length_PCD_set = 0
        for i in range(kVar):
            if PCD_set_all[i] != []:
                length_PCD_set += 1
        # break condition
        if lnum > length_PCD_set:
            if np.all(pDAG[:, target] != 1) and np.all(pDAG[target, :] != 1):
                # print("break")
                break
    # print(pDAG)
    for i in range(kVar):
        if pDAG[i, target] == -1:
            parents.append(i)
        if pDAG[target, i] == -1:
            children.append(i)
        if pDAG[target, i] == 1:
            undirected.append(i)

    return parents, children, undirected