예제 #1
0
    def test_random_walks_karate_graph(self):
        """Test that random walks have the right length and number."""
        graph = data_utils.load_karate_graph()
        walk_length = 6
        n_walks_per_node = 2

        paths = graph.random_walk(walk_length=walk_length,
                                  n_walks_per_node=n_walks_per_node)

        result = [len(paths), len(paths[0])]
        expected = [len(graph.edges) * n_walks_per_node, walk_length + 1]

        self.assertAllClose(result, expected)
    def setUp(self):
        """Set up function."""
        gs.random.seed(1234)
        dim = 2
        max_epochs = 3
        lr = .05
        n_negative = 2
        context_size = 1
        self.karate_graph = load_karate_graph()

        self.embedding = HyperbolicEmbedding(dim=dim,
                                             max_epochs=max_epochs,
                                             lr=lr,
                                             n_context=context_size,
                                             n_negative=n_negative)
def main():
    """Learning Poincaré graph embedding.

    Learns Poincaré Ball embedding by using Riemannian
    gradient descent algorithm. Then K-means is applied
    to learn labels of each data sample.
    """
    gs.random.seed(1234)

    karate_graph = load_karate_graph()

    hyperbolic_embedding = HyperbolicEmbedding()

    embeddings = hyperbolic_embedding.embed(karate_graph)

    colors = {1: 'b', 2: 'r'}
    group_1 = mpatches.Patch(color=colors[1], label='Group 1')
    group_2 = mpatches.Patch(color=colors[2], label='Group 2')

    circle = visualization.PoincareDisk(point_type='ball')

    _, ax = plt.subplots(figsize=(8, 8))
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    circle.set_ax(ax)
    circle.draw(ax=ax)
    for i_embedding, embedding in enumerate(embeddings):
        x = embedding[0]
        y = embedding[1]
        pt_id = i_embedding
        plt.scatter(
            x, y,
            c=colors[karate_graph.labels[pt_id][0]],
            s=150
        )
        ax.annotate(pt_id, (x, y))

    plt.tick_params(
        which='both')
    plt.title('Poincare Ball Embedding of the Karate Club Network')
    plt.legend(handles=[group_1, group_2])
    plt.show()

    n_clusters = 2

    kmeans = RiemannianKMeans(
        riemannian_metric=hyperbolic_embedding.manifold.metric,
        n_clusters=n_clusters,
        init='random',
        mean_method='frechet-poincare-ball')

    centroids = kmeans.fit(X=embeddings, max_iter=100)
    labels = kmeans.predict(X=embeddings)

    colors = ['g', 'c', 'm']
    circle = visualization.PoincareDisk(point_type='ball')
    _, ax2 = plt.subplots(figsize=(8, 8))
    circle.set_ax(ax2)
    circle.draw(ax=ax2)
    ax2.axes.xaxis.set_visible(False)
    ax2.axes.yaxis.set_visible(False)
    group_1_predicted = mpatches.Patch(
        color=colors[0], label='Predicted Group 1')
    group_2_predicted = mpatches.Patch(
        color=colors[1], label='Predicted Group 2')
    group_centroids = mpatches.Patch(
        color=colors[2], label='Cluster centroids')

    for _ in range(n_clusters):
        for i_embedding, embedding in enumerate(embeddings):
            x = embedding[0]
            y = embedding[1]
            pt_id = i_embedding
            if labels[i_embedding] == 0:
                color = colors[0]
            else:
                color = colors[1]
            plt.scatter(
                x, y,
                c=color,
                s=150
            )
            ax2.annotate(pt_id, (x, y))

    for _, centroid in enumerate(centroids):
        x = centroid[0]
        y = centroid[1]
        plt.scatter(
            x, y,
            c=colors[2],
            marker='*',
            s=150,
        )

    plt.title('K-means applied to Karate club embedding')
    plt.legend(handles=[group_1_predicted, group_2_predicted, group_centroids])
    plt.show()
예제 #4
0
 def test_karate_graph(self):
     """Test the correct number of edges and nodes for each graph."""
     graph = data_utils.load_karate_graph()
     result = len(graph.edges) + len(graph.labels)
     expected = 68
     self.assertTrue(result == expected)
def main():
    """Learning Poincaré graph embedding.

    Learns Poincaré Ball embedding by using Riemannian
    gradient descent algorithm.
    """
    gs.random.seed(1234)
    dim = 2
    max_epochs = 100
    lr = .05
    n_negative = 2
    context_size = 1
    karate_graph = load_karate_graph()

    nb_vertices_by_edges =\
        [len(e_2) for _, e_2 in karate_graph.edges.items()]
    logging.info('Number of edges: %s', len(karate_graph.edges))
    logging.info('Mean vertices by edges: %s',
                 (sum(nb_vertices_by_edges, 0) / len(karate_graph.edges)))

    negative_table_parameter = 5
    negative_sampling_table = []

    for i, nb_v in enumerate(nb_vertices_by_edges):
        negative_sampling_table +=\
            ([i] * int((nb_v**(3. / 4.))) * negative_table_parameter)

    negative_sampling_table = gs.array(negative_sampling_table)
    random_walks = karate_graph.random_walk()
    embeddings = gs.random.normal(size=(karate_graph.n_nodes, dim))
    embeddings = embeddings * 0.2

    hyperbolic_manifold = PoincareBall(2)

    colors = {1: 'b', 2: 'r'}
    for epoch in range(max_epochs):
        total_loss = []
        for path in random_walks:

            for example_index, one_path in enumerate(path):
                context_index = path[max(0, example_index - context_size
                                         ):min(example_index +
                                               context_size, len(path))]
                negative_index =\
                    gs.random.randint(negative_sampling_table.shape[0],
                                      size=(len(context_index),
                                      n_negative))
                negative_index = negative_sampling_table[negative_index]

                example_embedding = embeddings[one_path]

                for one_context_i, one_negative_i in zip(
                        context_index, negative_index):
                    context_embedding = embeddings[one_context_i]
                    negative_embedding = embeddings[one_negative_i]
                    l, g_ex = loss(example_embedding, context_embedding,
                                   negative_embedding, hyperbolic_manifold)
                    total_loss.append(l)

                    example_to_update = embeddings[one_path]
                    embeddings[one_path] = hyperbolic_manifold.metric.exp(
                        -lr * g_ex, example_to_update)

        logging.info('iteration %d loss_value %f', epoch,
                     sum(total_loss, 0) / len(total_loss))

    circle = visualization.PoincareDisk(point_type='ball')
    plt.figure()
    ax = plt.subplot(111)
    circle.add_points(gs.array([[0, 0]]))
    circle.set_ax(ax)
    circle.draw(ax=ax)
    for i_embedding, embedding in enumerate(embeddings):
        plt.scatter(embedding[0],
                    embedding[1],
                    c=colors[karate_graph.labels[i_embedding][0]])
    plt.show()