def forward(self, pos_g, neg_g, gpu_id=-1): """Do the forward. Parameters ---------- pos_g : DGLGraph Graph holding positive edges. neg_g : DGLGraph Graph holding negative edges. gpu_id : int Which gpu to accelerate the calculation. if -1 is provided, cpu is used. Returns ------- tensor loss value dict loss info """ # print(gpu_id, self.transform_net.transform_e_net.weight) if self.train_mode == 'roberta': pos_g.ndata['emb'] = self.transform_net.embed_entity(self.entity_feat(pos_g.ndata['id'], gpu_id, False)) pos_g.edata['emb'] = self.transform_net.embed_relation(self.relation_feat(pos_g.edata['id'], gpu_id, False)) elif self.train_mode == 'shallow': pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True) elif self.train_mode == 'concat': pos_g.ndata['emb'] = self.transform_net.embed_entity(torch.cat([self.entity_feat(pos_g.ndata['id'], gpu_id, False), self.entity_emb(pos_g.ndata['id'], gpu_id, True)], -1)) pos_g.edata['emb'] = self.transform_net.embed_relation(torch.cat([self.relation_feat(pos_g.edata['id'], gpu_id, False), self.relation_emb(pos_g.edata['id'], gpu_id, True)], -1)) self.score_func.prepare(pos_g, gpu_id, True) pos_score = self.predict_score(pos_g) if gpu_id >= 0: neg_score = self.predict_neg_score(pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=True, neg_deg_sample=self.args.neg_deg_sample) else: neg_score = self.predict_neg_score(pos_g, neg_g, trace=True, neg_deg_sample=self.args.neg_deg_sample) neg_score = reshape(neg_score, -1, neg_g.neg_sample_size) # subsampling weight # TODO: add subsampling to new sampler #if self.args.non_uni_weight: # subsampling_weight = pos_g.edata['weight'] # pos_score = (pos_score * subsampling_weight).sum() / subsampling_weight.sum() # neg_score = (neg_score * subsampling_weight).sum() / subsampling_weight.sum() #else: edge_weight = F.copy_to(pos_g.edata['impts'], get_dev(gpu_id)) if self.has_edge_importance else None loss, log = self.loss_gen.get_total_loss(pos_score, neg_score, edge_weight) # regularization: TODO(zihao) #TODO: only reg ent&rel embeddings. other params to be added. if self.args.regularization_coef > 0.0 and self.args.regularization_norm > 0 and self.train_mode in ['concat', 'shallow']: coef, nm = self.args.regularization_coef, self.args.regularization_norm reg = coef * (norm(self.entity_emb.curr_emb(), nm) + norm(self.relation_emb.curr_emb(), nm)) log['regularization'] = get_scalar(reg) loss = loss + reg return loss, log
def _load_node_feature(self, device): if len(self._features) == 1 and self._features[0].is_homo: features = self._features[0] ft = F.tensor(features.features) ft = F.copy_to(ft, device) self._g.ndata['homo_f'] = ft else: # (TODO xiangsx) heto graph assert False
def knn_graphE(x, k, istrain=False): """Transforms the given point set to a directed graph, whose coordinates are given as a matrix. The predecessors of each point are its k-nearest neighbors. If a 3D tensor is given instead, then each row would be transformed into a separate graph. The graphs will be unioned. Parameters ---------- x : Tensor The input tensor. If 2D, each row of ``x`` corresponds to a node. If 3D, a k-NN graph would be constructed for each row. Then the graphs are unioned. k : int The number of neighbors Returns ------- DGLGraph The graph. The node IDs are in the same order as ``x``. """ if F.ndim(x) == 2: x = F.unsqueeze(x, 0) n_samples, n_points, _ = F.shape(x) dist = pairwise_squared_distance(x) if istrain and np.random.rand() > 0.5: k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False) rand_k = np.random.permutation(round(1.5 * k) - 1)[0:k - 1] + 1 # 0 + random k-1 rand_k = np.append(rand_k, 0) k_indices = k_indices[:, :, rand_k] # add 0 else: k_indices = F.argtopk(dist, k, 2, descending=False) dst = F.copy_to(k_indices, F.cpu()) src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1)) per_sample_offset = F.reshape( F.arange(0, n_samples) * n_points, (-1, 1, 1)) dst += per_sample_offset src += per_sample_offset dst = F.reshape(dst, (-1, )) src = F.reshape(src, (-1, )) adj = sparse.csr_matrix( (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src)))) g = DGLGraph(adj, readonly=True) return g
def load_relation(self, device=None): """ Sync global relation embeddings into local relation embeddings. Used in multi-process multi-gpu training model. device : th.device Which device (GPU) to put relation embeddings in. """ self.relation_emb = ExternalEmbedding(self.args, self.n_relations, self.rel_dim, device) self.relation_emb.emb = F.copy_to(self.global_relation_emb.emb, device) if self.model_name == 'TransR': local_projection_emb = ExternalEmbedding(self.args, self.n_relations, self.entity_dim * self.rel_dim, device) self.score_func.load_local_emb(local_projection_emb)
def writeback_relation(self, rank=0, rel_parts=None): """ Writeback relation embeddings in a specific process to global relation embedding. Used in multi-process multi-gpu training model. rank : int Process id. rel_parts : List of tensor List of tensor stroing edge types of each partition. """ idx = rel_parts[rank] self.global_relation_emb.emb[idx] = F.copy_to(self.relation_emb.emb, F.cpu())[idx] if self.model_name == 'TransR': self.score_func.writeback_local_emb(idx)
def score(self, head, rel, tail, triplet_wise=False): head_emb = self.entity_emb(head) rel_emb = self.relation_emb(rel) tail_emb = self.entity_emb(tail) num_head = F.shape(head)[0] num_rel = F.shape(rel)[0] num_tail = F.shape(tail)[0] batch_size = self.batch_size score = [] if triplet_wise: class FakeEdge(object): def __init__(self, head_emb, rel_emb, tail_emb): self._hobj = {} self._robj = {} self._tobj = {} self._hobj['emb'] = head_emb self._robj['emb'] = rel_emb self._tobj['emb'] = tail_emb @property def src(self): return self._hobj @property def dst(self): return self._tobj @property def data(self): return self._robj for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] sr_emb = rel_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] edata = FakeEdge(sh_emb, sr_emb, st_emb) score.append( F.copy_to( self.score_func.edge_func(edata)['score'], F.cpu())) score = F.cat(score, dim=0) return score else: for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] s_score = [] for j in range((num_tail + batch_size - 1) // batch_size): st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \ if (j + 1) * batch_size < num_tail \ else num_tail] s_score.append( F.copy_to( self.score_func.infer(sh_emb, rel_emb, st_emb), F.cpu())) score.append(F.cat(s_score, dim=2)) score = F.cat(score, dim=0) return F.reshape(score, (num_head * num_rel * num_tail, ))
def topK(self, head=None, tail=None, bcast=False, pair_ws=False, k=10): if head is None: head = F.arange(0, self.emb.shape[0]) else: head = F.tensor(head) if tail is None: tail = F.arange(0, self.emb.shape[0]) else: tail = F.tensor(tail) head_emb = self.emb[head] tail_emb = self.emb[tail] if pair_ws is True: result = [] batch_size = self.batch_size # chunked cal score score = [] num_head = head.shape[0] num_tail = tail.shape[0] for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] sh_emb = F.copy_to(sh_emb, self.device) st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] st_emb = F.copy_to(st_emb, self.device) score.append(F.copy_to(self.sim_func(sh_emb, st_emb, pw=True), F.cpu())) score = F.cat(score, dim=0) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] result.append((F.asnumpy(head[sidx]), F.asnumpy(tail[sidx]), F.asnumpy(score))) else: num_head = head.shape[0] num_tail = tail.shape[0] batch_size = self.batch_size # chunked cal score score = [] for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] sh_emb = F.copy_to(sh_emb, self.device) s_score = [] for j in range((num_tail + batch_size - 1) // batch_size): st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \ if (j + 1) * batch_size < num_tail \ else num_tail] st_emb = F.copy_to(st_emb, self.device) s_score.append(F.copy_to(self.sim_func(sh_emb, st_emb), F.cpu())) score.append(F.cat(s_score, dim=1)) score = F.cat(score, dim=0) if bcast is False: result = [] idx = F.arange(0, num_head * num_tail) score = F.reshape(score, (num_head * num_tail, )) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] sidx = sidx idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) else: # bcast at head result = [] for i in range(num_head): i_score = score[i] sidx = F.argsort(i_score, dim=0, descending=True) idx = F.arange(0, num_tail) i_idx = sidx[:k] i_score = i_score[i_idx] idx = idx[i_idx] result.append((np.full((k,), F.asnumpy(head[i])), F.asnumpy(tail[idx]), F.asnumpy(i_score))) return result
def forward(self, pos_g, neg_g, gpu_id=-1): """Do the forward. Parameters ---------- pos_g : DGLGraph Graph holding positive edges. neg_g : DGLGraph Graph holding negative edges. gpu_id : int Which gpu to accelerate the calculation. if -1 is provided, cpu is used. Returns ------- tensor loss value dict loss info """ pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True) self.score_func.prepare(pos_g, gpu_id, True) pos_score = self.predict_score(pos_g) pos_score = logsigmoid(pos_score) if self.has_edge_importance: pos_score = pos_score * F.copy_to(pos_g.edata['impts'], get_dev(gpu_id)) if gpu_id >= 0: neg_score = self.predict_neg_score( pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=True, neg_deg_sample=self.args.neg_deg_sample) else: neg_score = self.predict_neg_score( pos_g, neg_g, trace=True, neg_deg_sample=self.args.neg_deg_sample) neg_score = reshape(neg_score, -1, neg_g.neg_sample_size) # Adversarial sampling if self.args.neg_adversarial_sampling: neg_score = F.sum( F.softmax(neg_score * self.args.adversarial_temperature, dim=1).detach() * logsigmoid(-neg_score), dim=1) else: neg_score = F.mean(logsigmoid(-neg_score), dim=1) # subsampling weight # TODO: add subsampling to new sampler #if self.args.non_uni_weight: # subsampling_weight = pos_g.edata['weight'] # pos_score = (pos_score * subsampling_weight).sum() / subsampling_weight.sum() # neg_score = (neg_score * subsampling_weight).sum() / subsampling_weight.sum() #else: if self.has_edge_importance: edge_weight = F.copy_to(pos_g.edata['impts'], get_dev(gpu_id)) pos_score = (pos_score * edge_weight).mean() neg_score = (neg_score * edge_weight).mean() else: pos_score = pos_score.mean() neg_score = neg_score.mean() # compute loss loss = -(pos_score + neg_score) / 2 log = { 'pos_loss': -get_scalar(pos_score), 'neg_loss': -get_scalar(neg_score), 'loss': get_scalar(loss) } # regularization: TODO(zihao) #TODO: only reg ent&rel embeddings. other params to be added. if self.args.regularization_coef > 0.0 and self.args.regularization_norm > 0: coef, nm = self.args.regularization_coef, self.args.regularization_norm reg = coef * (norm(self.entity_emb.curr_emb(), nm) + norm(self.relation_emb.curr_emb(), nm)) log['regularization'] = get_scalar(reg) loss = loss + reg return loss, log