def test_prob_space(self): a = torch.Tensor([[5.0, 5.0], [6.0, 2.0], [2.0, 6.0]]) b = torch.Tensor([[0.0, 0.0], [2.0, 6.0], [6.0, 2.0]]) expected = torch.Tensor([[0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]) npt.assert_allclose( pytorch_utils.average_tensors([a, b], norm_fn=F.softmax), expected )
def test_mean(self): a = torch.Tensor([[0.0, 2.0, 5.0], [5.0, -5.0, 6.0]]) b = torch.Tensor([[4.0, 2.0, -1.0], [5.0, 10.0, 6.0]]) c = torch.Tensor([[-1.0, 2.0, 5.0], [2.0, 10.0, 6.0]]) expected = torch.Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) npt.assert_allclose(pytorch_utils.average_tensors([a, b, c]), expected) third = 1.0 / 3.0 npt.assert_allclose( pytorch_utils.average_tensors([a, b, c], weights=[third, third, third]), expected, ) npt.assert_allclose( pytorch_utils.average_tensors([a, b, c], weights=[1, 1, 1]), 3 * expected ) npt.assert_allclose( pytorch_utils.average_tensors([a, b, c], weights=[1, 0, 0]), a )
def forward( self, unprojected_outs, src_tokens=None, input_tokens=None, possible_translation_tokens=None, select_single=None, ): weights = self.compute_weights(unprojected_outs, select_single) weights = [weights[:, :, i:i + 1] for i in range(self.n_systems)] averaged_unprojected = average_tensors(unprojected_outs, weights=weights) return self.output_projections[0](averaged_unprojected, src_tokens, input_tokens, possible_translation_tokens)
def forward( self, unprojected_outs, src_tokens=None, input_tokens=None, possible_translation_tokens=None, select_single=None, ): return self.output_projection( average_tensors(unprojected_outs) if select_single is None else unprojected_outs[select_single], src_tokens, input_tokens, possible_translation_tokens, )
def forward( self, input_tokens, encoder_out, incremental_state=None, possible_translation_tokens=None, ): if self.unfreeze_single: self.unfreeze_idx = (self.unfreeze_idx + 1) % self.unfreeze_mod if self.separate_training: unfreeze_combi_strat = len(self.decoders) == self.unfreeze_idx for p in self.combi_strat.parameters(): p.requires_grad = unfreeze_combi_strat self.freeze_decoders(self.unfreeze_idx) if incremental_state is None: incremental_state = { decoder_id: None for decoder_id in range(len(self.decoders)) } decoder_outs = [] decoder_contexts = self._get_contexts(encoder_out) for decoder_id, decoder in enumerate(self.decoders): if decoder_id not in incremental_state: incremental_state[decoder_id] = {} decoder_outs.append( decoder.forward_unprojected( input_tokens, decoder_contexts[decoder_id], incremental_state=incremental_state[decoder_id], )) mean_attn_scores = average_tensors([ decoder_outs[decoder_id][1] for decoder_id in self.attentive_decoder_ids ]) select_single = None if self.separate_training and not unfreeze_combi_strat: select_single = self.unfreeze_idx logits, possible_translation_tokens = self.combi_strat( [x for x, _ in decoder_outs], src_tokens=encoder_out[4], input_tokens=input_tokens if self.training else None, possible_translation_tokens=possible_translation_tokens, select_single=select_single, ) return logits, mean_attn_scores, possible_translation_tokens
def forward( self, unprojected_outs, src_tokens=None, input_tokens=None, possible_translation_tokens=None, select_single=None, ): assert possible_translation_tokens is None weights = self.compute_weights(unprojected_outs, select_single) weights = [weights[:, :, i:i + 1] for i in range(self.n_systems)] logits = [ p(o)[0] for p, o in zip(self.output_projections, unprojected_outs) ] avg = average_tensors(logits, weights=weights, norm_fn=self.norm_fn) if self.to_log: avg.log_() return avg, None
def forward( self, unprojected_outs, src_tokens=None, input_tokens=None, possible_translation_tokens=None, select_single=None, ): assert possible_translation_tokens is None if select_single is not None: return self.output_projections[select_single]( unprojected_outs[select_single]) logits = [ p(o)[0] for p, o in zip(self.output_projections, unprojected_outs) ] avg = average_tensors(logits, norm_fn=self.norm_fn) if self.to_log: avg.log_() return avg, None