def neg_embedding_loss(self, z, neg_edge_index): i, j, k = structured_negative_sampling(neg_edge_index, z.size(0)) hdis1 = self.hyperboloid.sqdist(z[i], z[k], self.c) hdis2 = self.hyperboloid.sqdist(z[i], z[j], self.c) out = hdis1 - hdis2 # out = (z[i] - z[k]).pow(2).sum(dim=1) - (z[i] - z[j]).pow(2).sum(dim=1) return torch.clamp(out, min=0).mean()
def train(train_edges, model): r"""train the model """ model.train() optimizer.zero_grad() train_edges_np = train_edges.cpu().numpy() train_nodes = np.unique(train_edges_np) train_to_idx, idx_to_train = get_node_mapping(train_nodes) #generate negatives with mapped positive edges #edges should be mapped to prevent allowing nodes not in training negatives = structured_negative_sampling(torch.tensor(train_to_idx(train_edges_np)), num_nodes=len(train_nodes)) #keep one of the two positive nodes in the edges rand_node = random.randint(0, 1) u = negatives[rand_node] v = negatives[2] # backmap proteins negative_edge = torch.stack([u,v]) negative_edge = idx_to_train(negative_edge.cpu().numpy()) negative_edge = torch.tensor(negative_edge).to(device) total_edge = torch.cat([train_edges, negative_edge], dim=-1).type(torch.LongTensor).to(device) #create scores, labels, update parameters link_logits = model(features, train_edges, total_edge) link_labels = get_link_labels(train_edges, negative_edge) loss = F.binary_cross_entropy_with_logits(link_logits, link_labels) loss.backward() optimizer.step() return loss.item()
def forward(self, x, edge_index, size=None): if size is None and torch.is_tensor(x): edge_index, _ = remove_self_loops(edge_index) edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(self.node_dim)) self.cache["edge_index"] = edge_index if torch.is_tensor(x): x = torch.matmul(x, self.weight) else: x = (None if x[0] is None else torch.matmul(x[0], self.weight), None if x[1] is None else torch.matmul(x[1], self.weight)) propagated = self.propagate(edge_index, size=size, x=x) if self.training: att_neg_list = [] for _ in range(self.num_neg_samples_per_edge): edge_j, edge_i, edge_k = structured_negative_sampling( edge_index=edge_index, num_nodes=x.size(0), ) x_j, x_k = x[edge_j], x[edge_k] att_neg = self.get_unnormalized_attention(x_j, x_k) att_neg_list.append(att_neg) self.cache["att_neg"] = torch.stack(att_neg_list, dim=-1) # [E, heads, num_neg] return propagated
def sample(self, edge_index: torch.LongTensor, num_nodes: int) -> torch.LongTensor: neg_edge_index = [] for i in range(self.num_negative_samples): tmp = structured_negative_sampling(edge_index, num_nodes) neg_edge_index.append(tmp[-1].tolist()) return torch.LongTensor(neg_edge_index)
def neg_sampling(edge_index, num_nodes): struc_neg_sampl = pyg_utils.structured_negative_sampling( edge_index, num_nodes) i, j, k = struc_neg_sampl i = i.tolist() k = k.tolist() neg_edge_index = [i, k] neg_edge_index = torch.tensor(neg_edge_index) return neg_edge_index
def neg_embedding_loss(self, z, neg_edge_index): """Computes the triplet loss between negative node pairs and sampled non-node pairs. Args: z (Tensor): The node embeddings. neg_edge_index (LongTensor): The negative edge indices. """ i, j, k = structured_negative_sampling(neg_edge_index, z.size(0)) out = (z[i] - z[k]).pow(2).sum(dim=1) - (z[i] - z[j]).pow(2).sum(dim=1) return torch.clamp(out, min=0).mean()
def test_structured_negative_sampling(): edge_index = torch.as_tensor([[0, 0, 1, 2], [0, 1, 2, 3]]) i, j, k = structured_negative_sampling(edge_index) assert i.size(0) == edge_index.size(1) assert j.size(0) == edge_index.size(1) assert k.size(0) == edge_index.size(1) adj = torch.zeros(4, 4, dtype=torch.bool) adj[i, j] = 1 neg_adj = torch.zeros(4, 4, dtype=torch.bool) neg_adj[i, k] = 1 assert (adj & neg_adj).sum() == 0 # Test with no self-loops: edge_index = torch.LongTensor([[0, 0, 1, 1, 2], [1, 2, 0, 2, 1]]) i, j, k = structured_negative_sampling(edge_index, num_nodes=4, contains_neg_self_loops=False) neg_edge_index = torch.vstack([i, k]) assert not contains_self_loops(neg_edge_index)
def neg_embedding_loss(self, z, neg_edge_index): """Computes the triplet loss between negative node pairs and sampled non-node pairs. Args: z (Tensor): The node embeddings. neg_edge_index (LongTensor): The negative edge indices. """ i, j, k = structured_negative_sampling(neg_edge_index, z.size(0)) torch.cuda.empty_cache() out = self.manifolds.sqdist(z[i], z[k], 1) - self.manifolds.sqdist( z[i], z[j], 1) return torch.clamp(out, min=0).mean()
def test_structured_negative_sampling(): edge_index = torch.as_tensor([[0, 0, 1, 2], [0, 1, 2, 3]]) i, j, k = structured_negative_sampling(edge_index) assert i.size(0) == edge_index.size(1) assert j.size(0) == edge_index.size(1) assert k.size(0) == edge_index.size(1) adj = torch.zeros(4, 4, dtype=torch.bool) adj[i, j] = 1 neg_adj = torch.zeros(4, 4, dtype=torch.bool) neg_adj[i, k] = 1 assert (adj & neg_adj).sum() == 0
def pos_embedding_loss(self, z, pos_edge_index): """Computes the triplet loss between positive node pairs and sampled non-node pairs. Args: z (Tensor): The node embeddings. pos_edge_index (LongTensor): The positive edge indices. """ i, j, k = structured_negative_sampling(pos_edge_index, z.size(0)) torch.cuda.empty_cache() out = self.manifolds.sqdist(z[i], z[j], 1) - self.manifolds.sqdist( z[i], z[k], 1) if torch.isinf(out).any(): print("check here") return torch.clamp(out, min=0).mean()
def negative_sampling_line(z, edge_index, num_negative_samples = 5): ''' Parameters: z: the sampled community using gumbel softmax reparametrization trick edge_index: edges in the graph num_negative_samples: number of negative samples to be used for the optimization The function has been partially inspired from this file: https://github.com/DMPierre/LINE/blob/master/utils/line.py ''' ## Basically this will output a tuple of length 3 and the third index will contain the nodes from negative edges negsamples = structured_negative_sampling(edge_index) negsamples = negsamples[2] negativenodes = -self.nodes_embeddings(negsamples) mulpositivebatch = torch.mul(v_i, v_j) positivebatch = F.logsigmoid(torch.sum(mulpositivebatch, dim=1))