Ejemplo n.º 1
0
def execute_walks_graph_embedding(graph: nx.Graph,
                                  length: int,
                                  iterations: int,
                                  p: float = 1.0,
                                  q: float = 1.0,
                                  **kwargs: Dict) -> np.ndarray:
    """Execute first/second order walks using GraphEmbedding walker.

    Parameters
    --------------------------
    graph: nx.Graph,
        The graph on which to run the walks.
    length: int,
        Lenght of the walks.
    iterations: int,
        Number of walks to start from each node.
    p: float = 1.0,
        Inverse weight for making the walk local.
        By default, the walk will be uniform.
    q: float = 1.0,
        Inverse weight for making the walk a deep first.
        By default, the walk will be uniform.
    kwargs: Dict,
        Additional parameters to be used in other libraries but not this one.

    Returns
    --------------------------
    Computed walks as numpy array.
    """
    return Node2Vec(graph,
                    walk_length=length,
                    num_walks=iterations,
                    workers=cpu_count(),
                    p=p,
                    q=q).sentences
Ejemplo n.º 2
0
    def get_embeddings(self, inst, th=1):
        G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None,
                                data=[('weight', float)])
        if self._embedding == 'deepwalk':
            model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
            model.train(window_size=5, iter=3)
        elif self._embedding == 'node2vec':
            model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1)  # init model
            model.train(window_size=5, iter=3)  # train model
        elif self._embedding == 'line':
            model = LINE(G, embedding_size=128, order='second')  # init model,order can be ['first','second','all']
            model.train(batch_size=1024, epochs=50, verbose=2)  # train model
        elif self._embedding == 'sdne':
            model = SDNE(G, hidden_size=[256, 128])  # init model
            model.train(batch_size=3000, epochs=40, verbose=2)  # train model
        elif self._embedding == 'struc2vec':
            model = Struc2Vec(G, 10, 80, workers=4, verbose=40, )  # init model
            model.train(window_size=5, iter=3)  # train model
        else:
            return self._normalise(inst)

        ebds = model.get_embeddings()
        coords = []
        for i in range(inst.n):
            coords.append(ebds[str(i)])
        return np.array(coords)
Ejemplo n.º 3
0
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
    # 分辨率参数-dpi,画布大小参数-figsize
    plt.figure(dpi=300, figsize=(24, 12))
    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])
    model = Node2Vec(G, walk_length=10, num_walks=80,
                     p=0.25, q=4, workers=1, use_rejection_sampling=0)
    model.train(window_size = 5, iter = 3)
    embeddings=model.get_embeddings()

    # evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Ejemplo n.º 4
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        type=str,
                        default="cora",
                        help='dataset for training')
    parser.add_argument('--tr_frac', type=float, default=0.2, help='tr_frac')

    args = parser.parse_args()
    # load_data
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels = load_data(
        args.dataset)
    idx = np.argmax(labels, axis=1)
    f = open('labels.txt', 'w')
    for i in range(labels.shape[0]):
        f.write(f'{i} {idx[i]} \n')

    adj = adj.toarray()
    G = nx.from_numpy_array(adj, create_using=nx.DiGraph())
    nx.write_edgelist(G, "test.edgelist", data=[('weight', int)])
    G = nx.read_edgelist('test.edgelist',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1)
    model.train(window_size=5, iter=3)
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings, args.tr_frac)
    plot_embeddings(embeddings)
Ejemplo n.º 5
0
    node_pos = model.fit_transform(emb_list)

    color_idx = {}

    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])

        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('../data/flight/brazil-airports.edgelist',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2)
    model.train()
    embeddings = model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Ejemplo n.º 6
0
    #                      p=0.25, q=2, workers=1, use_rejection_sampling=0)
    #     model.train(window_size=5, iter=3)
    #
    #     embeddings = model.get_embeddings()
    #     dic = evaluate_embeddings(embeddings)
    #     sum_mic+=dic['micro']
    #     sum_mac+=dic['macro']
    #     sum_acc+=dic['acc']
    #
    # print('ave_micro:')
    # print(sum_mic/iter)
    # print('ave_macro:')
    # print(sum_mac/iter)
    # print('ave_acc:')
    # print(sum_acc/iter)


    # model = DeepWalk(G, embed_size=128,walk_length=10, num_walks=80, workers=1)
    # model.train(window_size=5, iter=3)

    # model = LINE(G, embedding_size=128, order='second')
    # model = LINE(G, embedding_size=128, order='first')
    # model = LINE(G, embedding_size=128, order='all')
    # model.train(batch_size=1024, epochs=50, verbose=2)
    #
    model = Node2Vec(G, embed_size=128,walk_length=10, num_walks=80,
                     p=0.25, q=2, workers=1, use_rejection_sampling=0)
    model.train(window_size=5, iter=3)
    embeddings = model.get_embeddings()
    plot_embeddings(embeddings)
    # plot_embeddings_3D(embeddings)
Ejemplo n.º 7
0
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c))
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)])

    model=Node2Vec(G, walk_length = 10, num_walks = 80,
                   p = 0.25, q = 4, workers = 1)
    model.train(window_size = 5, iter = 3)
    embeddings=model.get_embeddings()

    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Ejemplo n.º 8
0
    # embeddings = model.get_embeddings()
    #
    # evaluate_embeddings(embeddings)
    # plot_embeddings(embeddings)

    import pandas as pd

    df = pd.DataFrame()
    df['source'] = [str(i) for i in [0, 1, 2, 3, 4, 4, 6, 7, 7, 9]]
    df['target'] = [str(i) for i in [1, 4, 4, 4, 6, 7, 5, 8, 9, 8]]

    G = nx.from_pandas_edgelist(df, create_using=nx.Graph())

    model = Node2Vec(G,
                     10,
                     80,
                     workers=1,
                     p=0.25,
                     q=2,
                     use_rejection_sampling=0)
    model.train(embed_size=2)

    embeddings = model.get_embeddings()
    # print(embeddings)
    x, y = [], []
    print(sorted(embeddings.items(), key=lambda x: x[0]))
    for k, i in embeddings.items():
        x.append(i[0])
        y.append(i[1])
    plt.scatter(x, y)
    plt.show()