def log_callback_em_conductance(embeddings, adjancy_matrix, n_centroid): kmeans = pem.PoincareEM(n_centroid) kmeans.fit(embeddings) i = kmeans.predict(embeddings) r = torch.arange(0, i.size(0), device=i.device) prediction = torch.zeros(embeddings.size(0), n_centroid) prediction[r, i] = 1 return { "conductance": evaluation.mean_conductance(prediction, adjancy_matrix) }
for i in range(50): tloss = 0. for x, y in dataloader: optimizer.zero_grad() pe_x = model(x.long()) pe_y = model(y.long()) ne = model((torch.rand(len(x), 10) * len(X)).long()) loss = graph_embedding_criterion(pe_x, pe_y, z=ne, manifold=manifold).sum() tloss += loss.item() loss.backward() optimizer.step() print('Loss value for iteration ', i, ' is ', tloss) em_alg = poincare_em.PoincareEM(13) em_alg.fit(model.weight.data) NF = em_alg.get_normalisation_coef() pi, mu, sigma = em_alg.get_parameters() pik = em_alg.get_pik(model.weight.data) plot_poincare_disc_embeddings(model.weight.data.numpy(), labels=dataset.Y, save_folder="LOG/community_loss", file_name="LFR_before_community_loss.png", centroids=mu) optimizer = rsgd.RSGD(model.parameters(), 2e-2, manifold=manifold) dataset_o3 = corpora_tools.from_indexable(
# This script estimate gmm import torch from rcome.clustering_tools import poincare_em from rcome.visualisation_tools.plot_tools import plot_poincare_gmm # generating random set draw fro uniform set_A = torch.randn(50, 2) * 2e-1 + 0.3 set_B = torch.randn(50, 2) * 1e-1 - 0.5 representation = torch.cat((set_A, set_B), 0) out_of_disc = (representation[representation.norm(2, -1) > 1].norm(2, -1) + 1e-2) representation[representation.norm(2,-1)>1] = \ torch.einsum('ij, i -> ij', representation[representation.norm(2,-1)>1], 1/out_of_disc) # estimate the gmm em_alg = poincare_em.PoincareEM(2) em_alg.fit(representation) Y = torch.zeros(100, 1).long() Y[:50] = 1 plot_poincare_gmm(representation, em_alg, labels=Y, save_folder="LOG/gmm_estimation", file_name="gmm_estimation.png")