def task_pos_weights(self, indices): """Get weights for positive samples on each task This should only be used when all tasks are binary classification. It's quite common that the number of positive samples and the number of negative samples are significantly different for binary classification. To compensate for the class imbalance issue, we can weight each datapoint in loss computation. In particular, for each task we will set the weight of negative samples to be 1 and the weight of positive samples to be the number of negative samples divided by the number of positive samples. Parameters ---------- indices : 1D LongTensor The function will compute the weights on the data subset specified by the indices, e.g. the indices for the training set. Returns ------- Tensor of dtype float32 and shape (T) Weight of positive samples on all tasks """ task_pos_weights = torch.ones(self.labels.shape[1]) num_pos = F.sum(self.labels[indices], dim=0) num_indices = F.sum(self.mask[indices], dim=0) task_pos_weights[num_pos > 0] = ((num_indices - num_pos) / num_pos)[num_pos > 0] return task_pos_weights
def forward_test(self, pos_g, neg_g, logs, gpu_id=-1): """Do the forward and generate ranking results. Parameters ---------- pos_g : DGLGraph Graph holding positive edges. neg_g : DGLGraph Graph holding negative edges. logs : List Where to put results in. gpu_id : int Which gpu to accelerate the calculation. if -1 is provided, cpu is used. """ pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, False) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, False) self.score_func.prepare(pos_g, gpu_id, False) batch_size = pos_g.number_of_edges() pos_scores = self.predict_score(pos_g) pos_scores = reshape(pos_scores, batch_size, -1) neg_scores = self.predict_neg_score( pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=False, neg_deg_sample=self.args.neg_deg_sample_eval) neg_scores = reshape(neg_scores, batch_size, -1) # We need to filter the positive edges in the negative graph. if self.args.eval_filter: filter_bias = reshape(neg_g.edata['bias'], batch_size, -1) if gpu_id >= 0: filter_bias = cuda(filter_bias, gpu_id) # find all indices where it is not false negative sample mask = filter_bias != -1 # To compute the rank of a positive edge among all negative edges, # we need to know how many negative edges have higher scores than # the positive edge. for i in range(batch_size): if self.args.eval_filter: # select all the true negative samples where its score >= positive sample ranking = F.asnumpy( F.sum(masked_select(neg_scores[i] >= pos_scores[i], mask[i]), dim=0) + 1) else: ranking = F.asnumpy( F.sum(neg_scores[i] >= pos_scores[i], dim=0) + 1) logs.append({ 'MRR': 1.0 / ranking, 'MR': float(ranking), 'HITS@1': 1.0 if ranking <= 1 else 0.0, 'HITS@3': 1.0 if ranking <= 3 else 0.0, 'HITS@10': 1.0 if ranking <= 10 else 0.0 })
def forward_test(self, pos_g, neg_g, logs, gpu_id=-1): pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, False) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, False) batch_size = pos_g.number_of_edges() pos_scores = self.predict_score(pos_g) pos_scores = reshape(logsigmoid(pos_scores), batch_size, -1) neg_scores = self.predict_neg_score(pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=False) neg_scores = reshape(logsigmoid(neg_scores), batch_size, -1) # We need to filter the positive edges in the negative graph. filter_bias = reshape(neg_g.edata['bias'], batch_size, -1) if self.args.gpu >= 0: filter_bias = cuda(filter_bias, self.args.gpu) neg_scores += filter_bias # To compute the rank of a positive edge among all negative edges, # we need to know how many negative edges have higher scores than # the positive edge. rankings = F.sum(neg_scores > pos_scores, dim=1) + 1 rankings = F.asnumpy(rankings) for i in range(batch_size): ranking = rankings[i] logs.append({ 'MRR': 1.0 / ranking, 'MR': float(ranking), 'HITS@1': 1.0 if ranking <= 1 else 0.0, 'HITS@3': 1.0 if ranking <= 3 else 0.0, 'HITS@10': 1.0 if ranking <= 10 else 0.0 })
def pairwise_squared_distance(x): """ x : (n_samples, n_points, dims) return : (n_samples, n_points, n_points) """ x2s = F.sum(x * x, -1, True) # assuming that __matmul__ is always implemented (true for PyTorch, MXNet and Chainer) return x2s + F.swapaxes(x2s, -1, -2) - 2 * x @ F.swapaxes(x, -1, -2)
def forward(self, pos_g, neg_g, gpu_id=-1): pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True) self.score_func.prepare(pos_g, gpu_id, True) pos_score = self.predict_score(pos_g) pos_score = logsigmoid(pos_score) if gpu_id >= 0: neg_score = self.predict_neg_score(pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=True) else: neg_score = self.predict_neg_score(pos_g, neg_g, trace=True) neg_score = reshape(neg_score, -1, neg_g.neg_sample_size) # Adversarial sampling if self.args.neg_adversarial_sampling: neg_score = F.sum( F.softmax(neg_score * self.args.adversarial_temperature, dim=1).detach() * logsigmoid(-neg_score), dim=1) else: neg_score = F.mean(logsigmoid(-neg_score), dim=1) # subsampling weight # TODO: add subsampling to new sampler if self.args.non_uni_weight: subsampling_weight = pos_g.edata['weight'] pos_score = (pos_score * subsampling_weight).sum() / subsampling_weight.sum() neg_score = (neg_score * subsampling_weight).sum() / subsampling_weight.sum() else: pos_score = pos_score.mean() neg_score = neg_score.mean() # compute loss loss = -(pos_score + neg_score) / 2 log = { 'pos_loss': -get_scalar(pos_score), 'neg_loss': -get_scalar(neg_score), 'loss': get_scalar(loss) } # regularization: TODO(zihao) #TODO: only reg ent&rel embeddings. other params to be added. if self.args.regularization_coef > 0.0 and self.args.regularization_norm > 0: coef, nm = self.args.regularization_coef, self.args.regularization_norm reg = coef * (norm(self.entity_emb.curr_emb(), nm) + norm(self.relation_emb.curr_emb(), nm)) log['regularization'] = get_scalar(reg) loss = loss + reg return loss, log
def _weight_balancing(self): """Perform re-balancing for each task. It's quite common that the number of positive samples and the number of negative samples are significantly different. To compensate for the class imbalance issue, we can weight each datapoint in loss computation. In particular, for each task we will set the weight of negative samples to be 1 and the weight of positive samples to be the number of negative samples divided by the number of positive samples. If weight balancing is performed, one attribute will be affected: * self._task_pos_weights is set, which is a list of positive sample weights for each task. """ num_pos = F.sum(self.labels, dim=0) num_indices = F.sum(self.mask, dim=0) self._task_pos_weights = (num_indices - num_pos) / num_pos
def forward_test_wikikg(self, query, ans, candidate, mode, logs, gpu_id=-1): """Do the forward and generate ranking results. Parameters ---------- query : Tensor input head and relation for test or valid ans : Tenseor the correct tail entity index cadidate : Tensor negative sampled tail entity """ scores = self.predict_score_wikikg(query, candidate, mode, to_device=cuda, gpu_id=gpu_id, trace=False) if mode == "Valid": batch_size = query.shape[0] neg_scores = reshape(scores, batch_size, -1) for i in range(batch_size): ranking = F.asnumpy( F.sum(neg_scores[i] >= neg_scores[i][ans[i]], dim=0) + 1) logs.append({ 'MRR': 1.0 / ranking, 'MR': float(ranking), 'HITS@1': 1.0 if ranking <= 1 else 0.0, 'HITS@3': 1.0 if ranking <= 3 else 0.0, 'HITS@10': 1.0 if ranking <= 10 else 0.0 }) else: argsort = F.argsort(scores, dim=1, descending=True) logs.append(argsort[:, :10])
def _weight_balancing(self): num_pos = F.sum(self.label_list, dim=0) num_indices = F.sum(self.mask_list, dim=0) self._task_pos_weights = (num_indices - num_pos) / num_pos
def extended_jaccard_func(x, y): score = F.sum(x * y, dim=0) x = F.sum(x * x, dim=0) y = F.sum(y * y, dim=0) return score / (x + y - score)
def dot_func(x, y): return F.sum(x * y, dim=0)
def l2_func(x, y): score = x - y return -F.sum(score * score, dim=0) ** (1/2)
def cosine_func(x, y): score = F.sum(x * y, dim=0) x_norm2 = F.sum(x * x, dim=0) ** (1/2) y_norm2 = F.sum(y * y, dim=0) ** (1/2) return score / (x_norm2 * y_norm2)
def forward(self, pos_g, neg_g, gpu_id=-1): """Do the forward. Parameters ---------- pos_g : DGLGraph Graph holding positive edges. neg_g : DGLGraph Graph holding negative edges. gpu_id : int Which gpu to accelerate the calculation. if -1 is provided, cpu is used. Returns ------- tensor loss value dict loss info """ pos_g.ndata['emb'] = self.entity_emb(pos_g.ndata['id'], gpu_id, True) pos_g.edata['emb'] = self.relation_emb(pos_g.edata['id'], gpu_id, True) self.score_func.prepare(pos_g, gpu_id, True) pos_score = self.predict_score(pos_g) pos_score = logsigmoid(pos_score) if gpu_id >= 0: neg_score = self.predict_neg_score( pos_g, neg_g, to_device=cuda, gpu_id=gpu_id, trace=True, neg_deg_sample=self.args.neg_deg_sample) else: neg_score = self.predict_neg_score( pos_g, neg_g, trace=True, neg_deg_sample=self.args.neg_deg_sample) neg_score = reshape(neg_score, -1, neg_g.neg_sample_size) # Adversarial sampling if self.args.neg_adversarial_sampling: neg_score = F.sum( F.softmax(neg_score * self.args.adversarial_temperature, dim=1).detach() * logsigmoid(-neg_score), dim=1) else: neg_score = F.mean(logsigmoid(-neg_score), dim=1) # subsampling weight # TODO: add subsampling to new sampler #if self.args.non_uni_weight: # subsampling_weight = pos_g.edata['weight'] # pos_score = (pos_score * subsampling_weight).sum() / subsampling_weight.sum() # neg_score = (neg_score * subsampling_weight).sum() / subsampling_weight.sum() #else: pos_score = pos_score.mean() neg_score = neg_score.mean() # compute loss loss = -(pos_score + neg_score) / 2 log = { 'pos_loss': -get_scalar(pos_score), 'neg_loss': -get_scalar(neg_score), 'loss': get_scalar(loss) } # regularization: TODO(zihao) #TODO: only reg ent&rel embeddings. other params to be added. if self.args.regularization_coef > 0.0 and self.args.regularization_norm > 0: coef, nm = self.args.regularization_coef, self.args.regularization_norm reg = coef * (norm(self.entity_emb.curr_emb(), nm) + norm(self.relation_emb.curr_emb(), nm)) log['regularization'] = get_scalar(reg) loss = loss + reg return loss, log