Example #1
0
    def load_data(self):
        train_edges = []
        path_pk = "baselines.pkl"
        if os.path.exists(path_pk):
            print("The pkl file has existed!")
            with open(path_pk, 'rb') as f:
                (nx_graph, merge_graph, pos_edge_list, neg_edge_list,
                 nodes_attr) = pk.load(f)
        else:
            path = "Sampling_graph/Datasets_With_Attributes/" + os.path.basename(
                self.path) + ".graph"
            nx_graph, merge_graph, pos_edge_list, neg_edge_list, nodes_attr = Reader.data_load(
                path)
            with open(path_pk, 'wb') as f:
                pk.dump((nx_graph, merge_graph, pos_edge_list, neg_edge_list,
                         nodes_attr), f)
        # 对网络中的节点标签进行修改,需要进行排序
        test_edges, test_labels = get_selected_edges(pos_edge_list,
                                                     neg_edge_list)
        nodes = sorted(list(merge_graph.nodes()))
        if nodes[0] > 0:
            train_edges.extend([[i, e[0] - 1, e[1] - 1, 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_merge = nx.relabel_nodes(merge_graph, lambda x: int(x) - 1)
            train_nxgraph = [
                nx.relabel_nodes(g, lambda x: int(x) - 1) for g in nx_graph
            ]
            test_edges = [[e[0] - 1, e[1] - 1] for e in test_edges]
            nodes = list(train_merge.nodes())
        else:
            train_edges.extend([[i, e[0], e[1], 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_nxgraph = copy.deepcopy(nx_graph)
            train_merge = copy.deepcopy(merge_graph)

        # 有的节点编号并不是连续的,下面语句是为了使节点的编号连续
        if isinstance(test_edges[0], list):
            restru_test_edges = [(e[0], e[1]) for e in test_edges]
        else:
            restru_test_edges = [(nodes.index(e[0]), nodes.index(e[1]))
                                 for e in test_edges]
        str_graph = nx.relabel_nodes(train_merge, lambda x: str(x))

        # 下面操作的是opennet定义的网络,为了使用现有的单层网络算法做对比
        G = opgraph.Graph()
        DG = str_graph.to_directed()
        G.read_g(DG)
        nx_para_graph = []
        for g in train_nxgraph:
            str_graph = nx.relabel_nodes(g, lambda x: str(x))
            G = opgraph.Graph()
            DG = str_graph.to_directed()
            G.read_g(DG)
            nx_para_graph.append(G)

        return train_nxgraph, restru_test_edges, train_merge, test_edges, train_edges, test_labels
Example #2
0
def get_individual_emb(train_lncRNA_miRNA_matrix, positive_num):
    lncRNA_miRNA_matrix_net = construct_net(train_lncRNA_miRNA_matrix, positive_num)
    net2edgelist(np.mat(lncRNA_miRNA_matrix_net))
    graph1 = graph.Graph()
    graph1.read_edgelist("L_lncRNA_miRNA_edgelist_L.txt")

    dw_lncRNA_miRNA_emb = get_dw_embedding_matrix(np.mat(lncRNA_miRNA_matrix_net), graph1)  #
    dw_lncRNA_len = train_lncRNA_miRNA_matrix.shape[0]
    lncRNA_emb_dw = np.array(dw_lncRNA_miRNA_emb[0:dw_lncRNA_len, 0:])  #
    miRNA_emb_dw = np.array(dw_lncRNA_miRNA_emb[dw_lncRNA_len::, 0:])

    hope_lncRNA_miRNA_emb = get_hope_embedding_matrix(np.mat(lncRNA_miRNA_matrix_net), graph1)  #
    hope_lncRNA_len = train_lncRNA_miRNA_matrix.shape[0]
    lncRNA_emb_hope = np.array(hope_lncRNA_miRNA_emb[0:hope_lncRNA_len, 0:])
    miRNA_emb_hope = np.array(hope_lncRNA_miRNA_emb[hope_lncRNA_len::, 0:])

    lap_lncRNA_miRNA_emb = get_lap_embedding_matrix(np.mat(lncRNA_miRNA_matrix_net), graph1)  #
    lap_lncRNA_len = train_lncRNA_miRNA_matrix.shape[0]
    lncRNA_emb_lap = np.array(lap_lncRNA_miRNA_emb[0:lap_lncRNA_len, 0:])
    miRNA_emb_lap = np.array(lap_lncRNA_miRNA_emb[lap_lncRNA_len::, 0:])

    GraRep_lncRNA_miRNA_emb = get_GraRep_embedding_matrix(np.mat(lncRNA_miRNA_matrix_net), graph1)  #
    GraRep_lncRNA_len = train_lncRNA_miRNA_matrix.shape[0]
    lncRNA_emb_GraRep = np.array(GraRep_lncRNA_miRNA_emb[0:GraRep_lncRNA_len, 0:])
    miRNA_emb_GraRep = np.array(GraRep_lncRNA_miRNA_emb[GraRep_lncRNA_len::, 0:])

    GAE_lncRNA_miRNA_emb = get_GAE_embedding_matrix(np.mat(lncRNA_miRNA_matrix_net), hidden_unit=512)   
    GAE_lncRNA_len = train_lncRNA_miRNA_matrix.shape[0]
    GAE_lncRNA_emb = np.array(GAE_lncRNA_miRNA_emb[0:GAE_lncRNA_len, 0:])
    GAE_miRNA_emb = np.array(GAE_lncRNA_miRNA_emb[GAE_lncRNA_len::, 0:])

    return [lncRNA_emb_dw, miRNA_emb_dw, lncRNA_emb_hope, miRNA_emb_hope, lncRNA_emb_lap, miRNA_emb_lap, lncRNA_emb_GraRep, miRNA_emb_GraRep, GAE_lncRNA_emb, GAE_miRNA_emb]
Example #3
0
def Get_embedding_Matrix(gene_disease_matrix_net):
    Net2edgelist(gene_disease_matrix_net)

    graph1 = graph.Graph()
    graph1.read_edgelist("gene_disease.txt")
    _sdne = Get_sdne(graph1)
    _n2v = Get_n2v(graph1)
    _dw = Get_dw(graph1)
    _gf = Get_gf(graph1)
    _lap = Get_lap(graph1)
    _hope = Get_hope(graph1)
    return _dw
Example #4
0
def Main_function():

    # 数据加载阶段
    graphs_path = './Code/'
    graph_datasets = RMN.read_graph_pickle(graphs_path)

    # 表示学习参数设置阶段
    p = Parameter["p"]
    q = Parameter["q"]
    num_walks = Parameter["num_walks"]
    walk_length = Parameter["walk_length"]
    dimensions = Parameter["dimensions"]
    knei = [10, 15, 20, 25]
    mu = [0.4, 0.5, 0.6]
    for name, dets in graph_datasets.items():
        print("---------------%s---------------" % name)
        wvecs = []

        # 训练数据集的加载与测试
        nx_graph = dets['train_ng']
        merge_graph = dets['train_mg']

        # 测试验证集的加载与验证
        train_edges = []
        ground_truth = []
        test_edges = dets["test_edges"]
        test_labels = dets["test_labels"]

        # 对网络中的节点标签进行修改,需要进行排序
        nodes = sorted(list(merge_graph.nodes()))
        if nodes[0] > 0:
            train_edges.extend([[i, e[0] - 1, e[1] - 1, 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_merge = nx.relabel_nodes(merge_graph, lambda x: int(x) - 1)
            train_nxgraph = [
                nx.relabel_nodes(g, lambda x: int(x) - 1) for g in nx_graph
            ]
            test_edges = [[e[0] - 1, e[1] - 1] for i in test_edges for e in i]
            nodes = list(train_merge.nodes())
        else:
            train_edges.extend([[i, e[0], e[1], 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_nxgraph = copy.deepcopy(nx_graph)
            train_merge = copy.deepcopy(merge_graph)

        # 有的节点编号并不是连续的,下面语句是为了使节点的编号连续
        restru_test_edges = []
        for i in test_edges:
            restru_test_edges.append([[nodes.index(e[0]),
                                       nodes.index(e[1])] for e in i])
        str_graph = nx.relabel_nodes(train_merge, lambda x: str(x))

        # 下面操作的是opennet定义的网络,为了使用现有的单层网络算法做对比
        G = opgraph.Graph()
        DG = str_graph.to_directed()
        G.read_g(DG)
        nx_para_graph = []
        for g in train_nxgraph:
            str_graph = nx.relabel_nodes(g, lambda x: str(x))
            G = opgraph.Graph()
            DG = str_graph.to_directed()
            G.read_g(DG)
            nx_para_graph.append(G)

        ################################对比实验部分###############################
        #1# merge_network
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if e[0] in train_merge.nodes() and e[1] in train_merge.nodes():
                    y_pred.append(
                        list(nx.adamic_adar_index(train_merge, [e]))[0][2])
                else:
                    y_pred.append(0)  # 当不存在这个节点的时候,应该概率为0
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("merge-network:%f" % (sum(auc) / len(auc)))

        #2# Ohmnet 实现多层网络嵌入 Bioinformatics'2017
        ohmnet_walks = []
        orignal_walks = []
        LG = copy.deepcopy(train_nxgraph)
        on = ohmnet.OhmNet(LG,
                           p=p,
                           q=q,
                           num_walks=num_walks,
                           walk_length=walk_length,
                           dimension=dimensions,
                           window_size=10,
                           n_workers=8,
                           n_iter=5,
                           out_dir='.')
        for ns in on.embed_multilayer():
            orignal_walks.append(ns)
            on_walks = [n.split("_")[2] for n in ns]
            ohmnet_walks.append([str(step) for step in on_walks])
        Ohmnet_model = Node2vec.N2V.learn_embeddings(ohmnet_walks,
                                                     dimensions,
                                                     workers=5,
                                                     window_size=10,
                                                     niter=5)
        Ohmnet_wvecs = np.array(
            [Ohmnet_model.get_vector(str(i)) for i in nodes])
        y_pred = []
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if str(e[0]) in Ohmnet_model.index2entity and str(
                        e[1]
                ) in Ohmnet_model.index2entity:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity([
                            Ohmnet_model.get_vector(str(e[0])),
                            Ohmnet_model.get_vector(str(e[1]))
                        ])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("ohmnet-network:%f" % (sum(auc) / len(auc)))
        #
        # #3# MNE 实现可扩展的Multiplex network的嵌入,IJCAI'2018
        edge_data_by_type = {}
        all_edges = list()
        all_nodes = list()
        for e in train_edges:
            if e[0] not in edge_data_by_type:
                edge_data_by_type[e[0]] = list()
            edge_data_by_type[e[0]].append((e[1], e[2]))
            all_edges.append((e[1], e[2]))
            all_nodes.append(e[1])
            all_nodes.append(e[2])
        all_nodes = list(set(all_nodes))
        all_edges = list(set(all_edges))
        edge_data_by_type['Base'] = all_edges
        MNE_model = MNE.train_model(edge_data_by_type)
        local_model = dict()

        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for pos in range(len(MNE_model['index2word'])):
                local_model[MNE_model['index2word']
                            [pos]] = MNE_model['base'][pos] + 0.5 * np.dot(
                                MNE_model['addition'][index][pos],
                                MNE_model['tran'][index])
            for e in layer:
                if str(e[0]) in MNE_model['index2word'] and str(
                        e[1]
                ) in MNE_model['index2word']:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity(
                            [local_model[str(e[0])],
                             local_model[str(e[1])]])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("MNE:%f" % (sum(auc) / len(auc)))

        #4# PMNE的3种算法
        merged_networks = dict()
        merged_networks['training'] = dict()
        merged_networks['test_true'] = dict()
        merged_networks['test_false'] = dict()
        for index, g in enumerate(train_nxgraph):
            merged_networks['training'][index] = set(g.edges())
            merged_networks['test_true'][index] = restru_test_edges[index]
            merged_networks['test_false'][index] = test_edges[index][
                len(test_edges):]

        performance_1, performance_2, performance_3 = main.Evaluate_PMNE_methods(
            merged_networks)
        print("PMNE(n):%f" % (performance_1))
        print("PMNE(r):%f" % (performance_2))
        print("MNE(c):%f" % (performance_3))

        #5# MELL实现多层网络的节点表示学习,WWW’2018
        L = len(nx_graph)
        N = max([int(n) for n in train_merge.nodes()]) + 1
        N = max(N, train_merge.number_of_nodes())  # 为了构造邻接矩阵需要找到行的标准
        directed = True
        d = 128
        k = 3
        lamm = 10
        beta = 1
        gamma = 1
        MELL_wvecs = MELL_model(L, N, directed, train_edges, d, k, lamm, beta,
                                gamma)
        MELL_wvecs.train(30)  # 之前是500,但是有的数据集500会报错,因此设置为30
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if e[0] in all_nodes and e[
                        1] in all_nodes:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(MELL_wvecs.predict((index, e[0], e[1])))
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("MELL:%f" % (sum(auc) / len(auc)))

        #6# 基本相似性度量方法:CN JC AA
        auc1 = []
        auc2 = []
        auc3 = []
        for index, layer in enumerate(restru_test_edges):
            y_pred_cn = []
            y_pred_jc = []
            y_pred_AA = []
            for e in layer:
                if e[0] in train_nxgraph[index].nodes(
                ) and e[1] in train_nxgraph[index].nodes():
                    y_pred_cn.append(
                        len(
                            list(
                                nx.common_neighbors(train_nxgraph[index], e[0],
                                                    e[1]))))
                    y_pred_jc.append(
                        list(nx.jaccard_coefficient(train_nxgraph[index],
                                                    [e]))[0][2])
                    # y_pred_AA.append(list(nx.adamic_adar_index(train_nxgraph[index], [e]))[0][2])
                else:
                    y_pred_cn.append(0)  # 如果不存在这个节点,那么为共有邻居为0
                    y_pred_jc.append(0)
                    # y_pred_AA.append(0)

            auc1.append(roc_auc_score(test_labels[index], y_pred_cn))  # 计算AUC值
            auc2.append(roc_auc_score(test_labels[index], y_pred_jc))
            auc3.append(roc_auc_score(test_labels[index], y_pred_AA))
        print("CN-network:%f" % (sum(auc1) / len(auc1)))
        print("JC-network:%f" % (sum(auc2) / len(auc2)))
        print("AA-network:%f" % (sum(auc3) / len(auc3)))

        #7# Single-layer Node2vec
        auc = []
        for index, G in enumerate(nx_para_graph):
            model_nf = node2vec.Node2vec(G,
                                         walk_length,
                                         num_walks,
                                         dimensions,
                                         p=p,
                                         q=q,
                                         dw=True)
            index_num = sorted([int(i) for i in model_nf.vectors.keys()])
            g_embedding = [model_nf.vectors[str(i)] for i in index_num]
            y_pred = []
            for e in restru_test_edges[index]:
                if str(e[0]) in G.G.nodes() and str(
                        e[1]) in G.G.nodes():  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity([
                            model_nf.vectors[str(e[0])],
                            model_nf.vectors[str(e[1])]
                        ])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("Node2vec: %f" % (sum(auc) / len(auc)))

        #7# Network + Embedding(N2V) + SNF4st 网络的表示学习
        for k in knei:
            for m in mu:
                auc_final = []
                for i in range(2, 10):  # 为了求平均值
                    # 第一个参数是KNN的K值,第二个是mu值,第三个是其他过程使用的K值,最后一个参数使迭代次数,一般情况下20次就会达到收敛
                    network, groundtruth, best, second = NFC.cluster_E(
                        nx_para_graph, ground_truth, Parameter, nodes, k, m, k,
                        30)  # CKM\V(20, 0.5, 20, 20)
                    Network_Adj = _find_dominate_set(
                        check_symmetric(network, raise_warning=False),
                        K=k)  # 从网络的相似性矩阵中构建邻接矩阵 CKM(20) Vickers(15)
                    g = nx.from_numpy_matrix(Network_Adj)  # 基于邻接矩阵构建网络
                    auc = []
                    for index, layer in enumerate(restru_test_edges):
                        y_pred = []
                        for e in layer:
                            if e[0] in train_nxgraph[index].nodes(
                            ) and e[1] in train_nxgraph[index].nodes():
                                y_pred.append(
                                    list(
                                        nx.adamic_adar_index(
                                            g, [
                                                (nodes.index(
                                                    e[0]), nodes.index(e[1]))
                                            ]))[0][2])  # 利用RA相似性计算测试集两点之间概率
                            else:
                                y_pred.append(0)
                        auc.append(roc_auc_score(test_labels[index],
                                                 y_pred))  # 计算AUC值
                    auc_final.append(sum(auc) / len(auc))
                value = max(auc_final)
                average = sum(auc_final) / len(auc_final)
                print("K=%d Mu=%.2f Max:index({%d})->%f" %
                      (k, m, auc_final.index(value), value))
                print("K=%d Mu=%f Ave:->%f" % (k, m, average))