def test_average(self): total = Stats(my_int_metric=9, my_float_metric=1.2, count=3) self.assertEqual( total.average(), Stats(my_int_metric=3, my_float_metric=0.39999999999999997, count=3), )
def test_sum(self): a = Stats(my_int_metric=1, my_float_metric=0.1, count=1) b = Stats(my_int_metric=2, my_float_metric=0.0, count=2) c = Stats(my_int_metric=0, my_float_metric=0.2, count=2) self.assertEqual( Stats.sum([a, b, c]), Stats(my_int_metric=3, my_float_metric=0.30000000000000004, count=5), )
def _process_one_batch( self, model: MultiRelationEmbedder, batch_edges: EdgeList ) -> Stats: model.zero_grad() scores, reg = model(batch_edges) loss = self.calc_loss(scores, batch_edges) stats = Stats( loss=float(loss), reg=float(reg) if reg is not None else 0.0, violators_lhs=int((scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()), violators_rhs=int((scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()), count=len(batch_edges), ) if reg is not None: (loss + reg).backward() else: loss.backward() self.model_optimizer.step(closure=None) for optimizer in self.unpartitioned_optimizers.values(): optimizer.step(closure=None) for optimizer in self.partitioned_optimizers.values(): optimizer.step(closure=None) return stats
def process_one_batch( self, model: MultiRelationEmbedder, batch_edges: EdgeList, ) -> Stats: model.zero_grad() scores = model(batch_edges) lhs_loss = self.loss_fn(scores.lhs_pos, scores.lhs_neg) rhs_loss = self.loss_fn(scores.rhs_pos, scores.rhs_neg) relation = self.relations[batch_edges.get_relation_type_as_scalar( ) if batch_edges.has_scalar_relation_type() else 0] loss = relation.weight * (lhs_loss + rhs_loss) stats = Stats( loss=float(loss), violators_lhs=int( (scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()), violators_rhs=int( (scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()), count=len(batch_edges)) loss.backward() self.global_optimizer.step(closure=None) for optimizer in self.entity_optimizers.values(): optimizer.step(closure=None) return stats
def eval( self, scores: Scores, batch_edges: EdgeList, ) -> Stats: batch_size = len(batch_edges) ranks = [] aucs = [] if scores.lhs_neg.nelement() > 0: lhs_rank = (scores.lhs_neg >= scores.lhs_pos.unsqueeze(1)).sum(1) + 1 lhs_auc = compute_randomized_auc(scores.lhs_pos, scores.lhs_neg, batch_size) ranks.append(lhs_rank) aucs.append(lhs_auc) if scores.rhs_neg.nelement() > 0: rhs_rank = (scores.rhs_neg >= scores.rhs_pos.unsqueeze(1)).sum(1) + 1 rhs_auc = compute_randomized_auc(scores.rhs_pos, scores.rhs_neg, batch_size) ranks.append(rhs_rank) aucs.append(rhs_auc) return Stats( pos_rank=average_of_sums(*ranks), mrr=average_of_sums(*(rank.float().reciprocal() for rank in ranks)), r1=average_of_sums(*(rank.le(1) for rank in ranks)), r10=average_of_sums(*(rank.le(10) for rank in ranks)), r50=average_of_sums(*(rank.le(50) for rank in ranks)), # At the end the AUC will be averaged over count. auc=batch_size * sum(aucs) / len(aucs), count=batch_size)
def eval( self, scores: Scores, batch_edges: EdgeList, ) -> Stats: batch_size = len(batch_edges) lhs_rank = (scores.lhs_neg >= scores.lhs_pos.unsqueeze(1)).sum(1) + 1 rhs_rank = (scores.rhs_neg >= scores.rhs_pos.unsqueeze(1)).sum(1) + 1 lhs_auc = compute_randomized_auc(scores.lhs_pos, scores.lhs_neg, batch_size) rhs_auc = compute_randomized_auc(scores.rhs_pos, scores.rhs_neg, batch_size) return Stats( pos_rank=average_of_sums(lhs_rank, rhs_rank), mrr=average_of_sums(lhs_rank.float().reciprocal(), rhs_rank.float().reciprocal()), r1=average_of_sums(lhs_rank.le(1), rhs_rank.le(1)), r10=average_of_sums(lhs_rank.le(10), rhs_rank.le(10)), r50=average_of_sums(lhs_rank.le(50), rhs_rank.le(50)), # At the end the AUC will be averaged over count. auc=batch_size * (lhs_auc + rhs_auc) / 2, count=batch_size)
def _process_one_batch(self, model: MultiRelationEmbedder, batch_edges: EdgeList) -> Stats: # Tricky: this isbasically like calling `model.zero_grad()` except # that `zero_grad` calls `p.grad.zero_()`. When we perform infrequent # global L2 regularization, it converts the embedding gradients to dense, # and then they can never convert back to sparse gradients unless we set # them to `None` again here. for p in model.parameters(): p.grad = None scores, reg = model(batch_edges) loss = self.calc_loss(scores, batch_edges) stats = Stats( loss=float(loss), reg=float(reg) if reg is not None else 0.0, violators_lhs=int( (scores.lhs_neg > scores.lhs_pos.unsqueeze(1)).sum()), violators_rhs=int( (scores.rhs_neg > scores.rhs_pos.unsqueeze(1)).sum()), count=len(batch_edges), ) if reg is not None: loss = loss + reg if model.wd > 0 and random.random() < 1. / model.wd_interval: loss = loss + model.wd * model.wd_interval * model.l2_norm() loss.backward() self.model_optimizer.step(closure=None) for optimizer in self.unpartitioned_optimizers.values(): optimizer.step(closure=None) for optimizer in self.partitioned_optimizers.values(): optimizer.step(closure=None) return stats
def _process_one_batch(self, model: MultiRelationEmbedder, batch_edges: EdgeList) -> Stats: with torch.no_grad(): scores = model(batch_edges) self._adjust_scores(scores, batch_edges) batch_size = len(batch_edges) loss = self.calc_loss(scores, batch_edges) ranks = [] aucs = [] if scores.lhs_neg.nelement() > 0: lhs_rank = (scores.lhs_neg >= scores.lhs_pos.unsqueeze(1)).sum(1) + 1 lhs_auc = compute_randomized_auc(scores.lhs_pos, scores.lhs_neg, batch_size) ranks.append(lhs_rank) aucs.append(lhs_auc) if scores.rhs_neg.nelement() > 0: rhs_rank = (scores.rhs_neg >= scores.rhs_pos.unsqueeze(1)).sum(1) + 1 rhs_auc = compute_randomized_auc(scores.rhs_pos, scores.rhs_neg, batch_size) ranks.append(rhs_rank) aucs.append(rhs_auc) return Stats( loss=float(loss), pos_rank=average_of_sums(*ranks), mrr=average_of_sums(*(rank.float().reciprocal() for rank in ranks)), r1=average_of_sums(*(rank.le(1) for rank in ranks)), r10=average_of_sums(*(rank.le(10) for rank in ranks)), r50=average_of_sums(*(rank.le(50) for rank in ranks)), # At the end the AUC will be averaged over count. auc=batch_size * sum(aucs) / len(aucs), count=batch_size, )
def test_str(self): self.assertEqual( str(Stats(my_int_metric=1, my_float_metric=0.2, count=3)), "my_int_metric: 1 , my_float_metric: 0.2 , count: 3", )