def test_random_walks_karate_graph(self): """Test that random walks have the right length and number.""" graph = data_utils.load_karate_graph() walk_length = 6 n_walks_per_node = 2 paths = graph.random_walk(walk_length=walk_length, n_walks_per_node=n_walks_per_node) result = [len(paths), len(paths[0])] expected = [len(graph.edges) * n_walks_per_node, walk_length + 1] self.assertAllClose(result, expected)
def setUp(self): """Set up function.""" gs.random.seed(1234) dim = 2 max_epochs = 3 lr = .05 n_negative = 2 context_size = 1 self.karate_graph = load_karate_graph() self.embedding = HyperbolicEmbedding(dim=dim, max_epochs=max_epochs, lr=lr, n_context=context_size, n_negative=n_negative)
def main(): """Learning Poincaré graph embedding. Learns Poincaré Ball embedding by using Riemannian gradient descent algorithm. Then K-means is applied to learn labels of each data sample. """ gs.random.seed(1234) karate_graph = load_karate_graph() hyperbolic_embedding = HyperbolicEmbedding() embeddings = hyperbolic_embedding.embed(karate_graph) colors = {1: 'b', 2: 'r'} group_1 = mpatches.Patch(color=colors[1], label='Group 1') group_2 = mpatches.Patch(color=colors[2], label='Group 2') circle = visualization.PoincareDisk(point_type='ball') _, ax = plt.subplots(figsize=(8, 8)) ax.axes.xaxis.set_visible(False) ax.axes.yaxis.set_visible(False) circle.set_ax(ax) circle.draw(ax=ax) for i_embedding, embedding in enumerate(embeddings): x = embedding[0] y = embedding[1] pt_id = i_embedding plt.scatter( x, y, c=colors[karate_graph.labels[pt_id][0]], s=150 ) ax.annotate(pt_id, (x, y)) plt.tick_params( which='both') plt.title('Poincare Ball Embedding of the Karate Club Network') plt.legend(handles=[group_1, group_2]) plt.show() n_clusters = 2 kmeans = RiemannianKMeans( riemannian_metric=hyperbolic_embedding.manifold.metric, n_clusters=n_clusters, init='random', mean_method='frechet-poincare-ball') centroids = kmeans.fit(X=embeddings, max_iter=100) labels = kmeans.predict(X=embeddings) colors = ['g', 'c', 'm'] circle = visualization.PoincareDisk(point_type='ball') _, ax2 = plt.subplots(figsize=(8, 8)) circle.set_ax(ax2) circle.draw(ax=ax2) ax2.axes.xaxis.set_visible(False) ax2.axes.yaxis.set_visible(False) group_1_predicted = mpatches.Patch( color=colors[0], label='Predicted Group 1') group_2_predicted = mpatches.Patch( color=colors[1], label='Predicted Group 2') group_centroids = mpatches.Patch( color=colors[2], label='Cluster centroids') for _ in range(n_clusters): for i_embedding, embedding in enumerate(embeddings): x = embedding[0] y = embedding[1] pt_id = i_embedding if labels[i_embedding] == 0: color = colors[0] else: color = colors[1] plt.scatter( x, y, c=color, s=150 ) ax2.annotate(pt_id, (x, y)) for _, centroid in enumerate(centroids): x = centroid[0] y = centroid[1] plt.scatter( x, y, c=colors[2], marker='*', s=150, ) plt.title('K-means applied to Karate club embedding') plt.legend(handles=[group_1_predicted, group_2_predicted, group_centroids]) plt.show()
def test_karate_graph(self): """Test the correct number of edges and nodes for each graph.""" graph = data_utils.load_karate_graph() result = len(graph.edges) + len(graph.labels) expected = 68 self.assertTrue(result == expected)
def main(): """Learning Poincaré graph embedding. Learns Poincaré Ball embedding by using Riemannian gradient descent algorithm. """ gs.random.seed(1234) dim = 2 max_epochs = 100 lr = .05 n_negative = 2 context_size = 1 karate_graph = load_karate_graph() nb_vertices_by_edges =\ [len(e_2) for _, e_2 in karate_graph.edges.items()] logging.info('Number of edges: %s', len(karate_graph.edges)) logging.info('Mean vertices by edges: %s', (sum(nb_vertices_by_edges, 0) / len(karate_graph.edges))) negative_table_parameter = 5 negative_sampling_table = [] for i, nb_v in enumerate(nb_vertices_by_edges): negative_sampling_table +=\ ([i] * int((nb_v**(3. / 4.))) * negative_table_parameter) negative_sampling_table = gs.array(negative_sampling_table) random_walks = karate_graph.random_walk() embeddings = gs.random.normal(size=(karate_graph.n_nodes, dim)) embeddings = embeddings * 0.2 hyperbolic_manifold = PoincareBall(2) colors = {1: 'b', 2: 'r'} for epoch in range(max_epochs): total_loss = [] for path in random_walks: for example_index, one_path in enumerate(path): context_index = path[max(0, example_index - context_size ):min(example_index + context_size, len(path))] negative_index =\ gs.random.randint(negative_sampling_table.shape[0], size=(len(context_index), n_negative)) negative_index = negative_sampling_table[negative_index] example_embedding = embeddings[one_path] for one_context_i, one_negative_i in zip( context_index, negative_index): context_embedding = embeddings[one_context_i] negative_embedding = embeddings[one_negative_i] l, g_ex = loss(example_embedding, context_embedding, negative_embedding, hyperbolic_manifold) total_loss.append(l) example_to_update = embeddings[one_path] embeddings[one_path] = hyperbolic_manifold.metric.exp( -lr * g_ex, example_to_update) logging.info('iteration %d loss_value %f', epoch, sum(total_loss, 0) / len(total_loss)) circle = visualization.PoincareDisk(point_type='ball') plt.figure() ax = plt.subplot(111) circle.add_points(gs.array([[0, 0]])) circle.set_ax(ax) circle.draw(ax=ax) for i_embedding, embedding in enumerate(embeddings): plt.scatter(embedding[0], embedding[1], c=colors[karate_graph.labels[i_embedding][0]]) plt.show()