예제 #1
0
def get_dw_embedding_matrix(lncRNA_miRNA_matrix_net, graph1):
    model = node2vec.Node2vec(graph=graph1, path_length=80, num_paths=30, dim=30, dw=True)  
    vec = model.vectors
    matrix = np.zeros((len(lncRNA_miRNA_matrix_net), len(list(vec.values())[0])))
    for key, value in vec.items():
        matrix[int(key), :] = value
    return matrix
예제 #2
0
def Get_n2v(graph1):
    model = node2vec.Node2vec(graph=graph1,
                              path_length=80,
                              num_paths=10,
                              dim=10)
    n2v_vectors = get_embedding(model.vectors)
    return n2v_vectors
예제 #3
0
def cluster(nx_graph, ground_truth, Parameter, K1, mu, K2, t, tsne=False):
    wvecs = []
    for G in nx_graph:
        model_nf = node2vec.Node2vec(G,
                                     Parameter["walk_length"],
                                     Parameter["num_walks"],
                                     Parameter["dimensions"],
                                     p=1,
                                     q=1,
                                     dw=True)
        index_num = sorted([int(i) for i in model_nf.vectors.keys()])
        g_embedding = [model_nf.vectors[str(i)] for i in index_num]
        wvecs.append(np.array(g_embedding))
    snf_wrapper = sw.snf_wrapper(wvecs, ground_truth)
    network, ground_truth, best, second = snf_wrapper.fusion(K1, mu, K2, t)
    return network, ground_truth, best, second
예제 #4
0
def deepwalk_embedding(time=None,
                       path_length=10,
                       num_paths=5,
                       dim=10,
                       walkers=4,
                       window=5,
                       aggrateNum=3):

    if time == None:
        return np.zeros(dim)

    #load graph file
    interaction = sp.coo_matrix((10001, 10001))
    for i in range(aggrateNum):
        t = time - i * 600000
        fp_ = '{}/{}.npz'.format(graphPath, t)
        if os.path.exists(fp_):
            adj = load_npz(fp_)
            adj = adj.tocoo()
            print(interaction.shape)
            print(adj.shape)
            interaction += adj
    interaction = interaction.tocoo()
    row = interaction.row
    col = interaction.col
    val = interaction.data
    edges = list(zip(row, col, val))
    tmp = 'tmp.txt'
    f = open(tmp, "w+")
    for r, c, v in edges:
        f.write('{} {} {}\n'.format(r, c, v))
    f.close()
    #build graph
    g = Graph()
    g.read_edgelist(filename='tmp.txt', weighted=True, directed=True)
    #embedding
    model = node2vec.Node2vec(graph=g,
                              path_length=10,
                              num_paths=5,
                              dim=10,
                              workers=4,
                              window=5,
                              dw=True)
    # return embedding
    print(type(model.vectors))
    return model
예제 #5
0
def cluster_E(nx_graph,
              ground_truth,
              Parameter,
              nodes,
              K1,
              mu,
              K2,
              t,
              tsne=False):
    # 基于node2vec的表示学习过程
    wvecs = []
    arr_size = len(nodes)
    walk_length = Parameter["walk_length"]
    num_walks = Parameter["num_walks"]
    dimensions = Parameter["dimensions"]
    p = Parameter["p"]
    q = Parameter["q"]
    each_data = np.random.random((arr_size, dimensions))
    for G in nx_graph:
        model_nf = node2vec.Node2vec(G,
                                     walk_length,
                                     num_walks,
                                     dimensions,
                                     p=p,
                                     q=q,
                                     dw=True)
        index_num = sorted([int(i) for i in model_nf.vectors.keys()])
        g_embedding = [model_nf.vectors[str(i)] for i in index_num]
        wvecs.append(np.array(g_embedding))

    # SNF算法过程
    snf_wrapper = sw.snf_wrapper(wvecs, ground_truth)
    network, ground_truth, best, second = snf_wrapper.fusion_E(K1, mu, K2, t)

    # 返回学习到的网络
    return network, ground_truth, best, second
예제 #6
0
def Main_function():

    # 数据加载阶段
    graphs_path = './Code/'
    graph_datasets = RMN.read_graph_pickle(graphs_path)

    # 表示学习参数设置阶段
    p = Parameter["p"]
    q = Parameter["q"]
    num_walks = Parameter["num_walks"]
    walk_length = Parameter["walk_length"]
    dimensions = Parameter["dimensions"]
    knei = [10, 15, 20, 25]
    mu = [0.4, 0.5, 0.6]
    for name, dets in graph_datasets.items():
        print("---------------%s---------------" % name)
        wvecs = []

        # 训练数据集的加载与测试
        nx_graph = dets['train_ng']
        merge_graph = dets['train_mg']

        # 测试验证集的加载与验证
        train_edges = []
        ground_truth = []
        test_edges = dets["test_edges"]
        test_labels = dets["test_labels"]

        # 对网络中的节点标签进行修改,需要进行排序
        nodes = sorted(list(merge_graph.nodes()))
        if nodes[0] > 0:
            train_edges.extend([[i, e[0] - 1, e[1] - 1, 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_merge = nx.relabel_nodes(merge_graph, lambda x: int(x) - 1)
            train_nxgraph = [
                nx.relabel_nodes(g, lambda x: int(x) - 1) for g in nx_graph
            ]
            test_edges = [[e[0] - 1, e[1] - 1] for i in test_edges for e in i]
            nodes = list(train_merge.nodes())
        else:
            train_edges.extend([[i, e[0], e[1], 1]
                                for i in range(len(nx_graph))
                                for e in nx_graph[i].edges()])
            train_nxgraph = copy.deepcopy(nx_graph)
            train_merge = copy.deepcopy(merge_graph)

        # 有的节点编号并不是连续的,下面语句是为了使节点的编号连续
        restru_test_edges = []
        for i in test_edges:
            restru_test_edges.append([[nodes.index(e[0]),
                                       nodes.index(e[1])] for e in i])
        str_graph = nx.relabel_nodes(train_merge, lambda x: str(x))

        # 下面操作的是opennet定义的网络,为了使用现有的单层网络算法做对比
        G = opgraph.Graph()
        DG = str_graph.to_directed()
        G.read_g(DG)
        nx_para_graph = []
        for g in train_nxgraph:
            str_graph = nx.relabel_nodes(g, lambda x: str(x))
            G = opgraph.Graph()
            DG = str_graph.to_directed()
            G.read_g(DG)
            nx_para_graph.append(G)

        ################################对比实验部分###############################
        #1# merge_network
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if e[0] in train_merge.nodes() and e[1] in train_merge.nodes():
                    y_pred.append(
                        list(nx.adamic_adar_index(train_merge, [e]))[0][2])
                else:
                    y_pred.append(0)  # 当不存在这个节点的时候,应该概率为0
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("merge-network:%f" % (sum(auc) / len(auc)))

        #2# Ohmnet 实现多层网络嵌入 Bioinformatics'2017
        ohmnet_walks = []
        orignal_walks = []
        LG = copy.deepcopy(train_nxgraph)
        on = ohmnet.OhmNet(LG,
                           p=p,
                           q=q,
                           num_walks=num_walks,
                           walk_length=walk_length,
                           dimension=dimensions,
                           window_size=10,
                           n_workers=8,
                           n_iter=5,
                           out_dir='.')
        for ns in on.embed_multilayer():
            orignal_walks.append(ns)
            on_walks = [n.split("_")[2] for n in ns]
            ohmnet_walks.append([str(step) for step in on_walks])
        Ohmnet_model = Node2vec.N2V.learn_embeddings(ohmnet_walks,
                                                     dimensions,
                                                     workers=5,
                                                     window_size=10,
                                                     niter=5)
        Ohmnet_wvecs = np.array(
            [Ohmnet_model.get_vector(str(i)) for i in nodes])
        y_pred = []
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if str(e[0]) in Ohmnet_model.index2entity and str(
                        e[1]
                ) in Ohmnet_model.index2entity:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity([
                            Ohmnet_model.get_vector(str(e[0])),
                            Ohmnet_model.get_vector(str(e[1]))
                        ])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("ohmnet-network:%f" % (sum(auc) / len(auc)))
        #
        # #3# MNE 实现可扩展的Multiplex network的嵌入,IJCAI'2018
        edge_data_by_type = {}
        all_edges = list()
        all_nodes = list()
        for e in train_edges:
            if e[0] not in edge_data_by_type:
                edge_data_by_type[e[0]] = list()
            edge_data_by_type[e[0]].append((e[1], e[2]))
            all_edges.append((e[1], e[2]))
            all_nodes.append(e[1])
            all_nodes.append(e[2])
        all_nodes = list(set(all_nodes))
        all_edges = list(set(all_edges))
        edge_data_by_type['Base'] = all_edges
        MNE_model = MNE.train_model(edge_data_by_type)
        local_model = dict()

        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for pos in range(len(MNE_model['index2word'])):
                local_model[MNE_model['index2word']
                            [pos]] = MNE_model['base'][pos] + 0.5 * np.dot(
                                MNE_model['addition'][index][pos],
                                MNE_model['tran'][index])
            for e in layer:
                if str(e[0]) in MNE_model['index2word'] and str(
                        e[1]
                ) in MNE_model['index2word']:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity(
                            [local_model[str(e[0])],
                             local_model[str(e[1])]])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("MNE:%f" % (sum(auc) / len(auc)))

        #4# PMNE的3种算法
        merged_networks = dict()
        merged_networks['training'] = dict()
        merged_networks['test_true'] = dict()
        merged_networks['test_false'] = dict()
        for index, g in enumerate(train_nxgraph):
            merged_networks['training'][index] = set(g.edges())
            merged_networks['test_true'][index] = restru_test_edges[index]
            merged_networks['test_false'][index] = test_edges[index][
                len(test_edges):]

        performance_1, performance_2, performance_3 = main.Evaluate_PMNE_methods(
            merged_networks)
        print("PMNE(n):%f" % (performance_1))
        print("PMNE(r):%f" % (performance_2))
        print("MNE(c):%f" % (performance_3))

        #5# MELL实现多层网络的节点表示学习,WWW’2018
        L = len(nx_graph)
        N = max([int(n) for n in train_merge.nodes()]) + 1
        N = max(N, train_merge.number_of_nodes())  # 为了构造邻接矩阵需要找到行的标准
        directed = True
        d = 128
        k = 3
        lamm = 10
        beta = 1
        gamma = 1
        MELL_wvecs = MELL_model(L, N, directed, train_edges, d, k, lamm, beta,
                                gamma)
        MELL_wvecs.train(30)  # 之前是500,但是有的数据集500会报错,因此设置为30
        auc = []
        for index, layer in enumerate(restru_test_edges):
            y_pred = []
            for e in layer:
                if e[0] in all_nodes and e[
                        1] in all_nodes:  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(MELL_wvecs.predict((index, e[0], e[1])))
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("MELL:%f" % (sum(auc) / len(auc)))

        #6# 基本相似性度量方法:CN JC AA
        auc1 = []
        auc2 = []
        auc3 = []
        for index, layer in enumerate(restru_test_edges):
            y_pred_cn = []
            y_pred_jc = []
            y_pred_AA = []
            for e in layer:
                if e[0] in train_nxgraph[index].nodes(
                ) and e[1] in train_nxgraph[index].nodes():
                    y_pred_cn.append(
                        len(
                            list(
                                nx.common_neighbors(train_nxgraph[index], e[0],
                                                    e[1]))))
                    y_pred_jc.append(
                        list(nx.jaccard_coefficient(train_nxgraph[index],
                                                    [e]))[0][2])
                    # y_pred_AA.append(list(nx.adamic_adar_index(train_nxgraph[index], [e]))[0][2])
                else:
                    y_pred_cn.append(0)  # 如果不存在这个节点,那么为共有邻居为0
                    y_pred_jc.append(0)
                    # y_pred_AA.append(0)

            auc1.append(roc_auc_score(test_labels[index], y_pred_cn))  # 计算AUC值
            auc2.append(roc_auc_score(test_labels[index], y_pred_jc))
            auc3.append(roc_auc_score(test_labels[index], y_pred_AA))
        print("CN-network:%f" % (sum(auc1) / len(auc1)))
        print("JC-network:%f" % (sum(auc2) / len(auc2)))
        print("AA-network:%f" % (sum(auc3) / len(auc3)))

        #7# Single-layer Node2vec
        auc = []
        for index, G in enumerate(nx_para_graph):
            model_nf = node2vec.Node2vec(G,
                                         walk_length,
                                         num_walks,
                                         dimensions,
                                         p=p,
                                         q=q,
                                         dw=True)
            index_num = sorted([int(i) for i in model_nf.vectors.keys()])
            g_embedding = [model_nf.vectors[str(i)] for i in index_num]
            y_pred = []
            for e in restru_test_edges[index]:
                if str(e[0]) in G.G.nodes() and str(
                        e[1]) in G.G.nodes():  # 如果关键字没有在字典Key中,则设置为0.5
                    y_pred.append(
                        cosine_similarity([
                            model_nf.vectors[str(e[0])],
                            model_nf.vectors[str(e[1])]
                        ])[0][1])
                else:
                    y_pred.append(0)
            auc.append(roc_auc_score(test_labels[index], y_pred))
        print("Node2vec: %f" % (sum(auc) / len(auc)))

        #7# Network + Embedding(N2V) + SNF4st 网络的表示学习
        for k in knei:
            for m in mu:
                auc_final = []
                for i in range(2, 10):  # 为了求平均值
                    # 第一个参数是KNN的K值,第二个是mu值,第三个是其他过程使用的K值,最后一个参数使迭代次数,一般情况下20次就会达到收敛
                    network, groundtruth, best, second = NFC.cluster_E(
                        nx_para_graph, ground_truth, Parameter, nodes, k, m, k,
                        30)  # CKM\V(20, 0.5, 20, 20)
                    Network_Adj = _find_dominate_set(
                        check_symmetric(network, raise_warning=False),
                        K=k)  # 从网络的相似性矩阵中构建邻接矩阵 CKM(20) Vickers(15)
                    g = nx.from_numpy_matrix(Network_Adj)  # 基于邻接矩阵构建网络
                    auc = []
                    for index, layer in enumerate(restru_test_edges):
                        y_pred = []
                        for e in layer:
                            if e[0] in train_nxgraph[index].nodes(
                            ) and e[1] in train_nxgraph[index].nodes():
                                y_pred.append(
                                    list(
                                        nx.adamic_adar_index(
                                            g, [
                                                (nodes.index(
                                                    e[0]), nodes.index(e[1]))
                                            ]))[0][2])  # 利用RA相似性计算测试集两点之间概率
                            else:
                                y_pred.append(0)
                        auc.append(roc_auc_score(test_labels[index],
                                                 y_pred))  # 计算AUC值
                    auc_final.append(sum(auc) / len(auc))
                value = max(auc_final)
                average = sum(auc_final) / len(auc_final)
                print("K=%d Mu=%.2f Max:index({%d})->%f" %
                      (k, m, auc_final.index(value), value))
                print("K=%d Mu=%f Ave:->%f" % (k, m, average))