def __compute_spellchecker_loss(self, embeddings: torch.Tensor, targets: torch.LongTensor) -> torch.Tensor: mask = targets > 0 non_masked_targets = targets.masked_select(mask) - 1 non_masked_embeddings = embeddings.masked_select( mask.unsqueeze(-1)).view(-1, self.output_dim + 1) return self.losses['spellchecker'](non_masked_embeddings, non_masked_targets)
def mask_at_indexes( tokenizer: PreTrainedTokenizer, ids: pt.LongTensor, tokens_mask: pt.BoolTensor, indexes: pt.LongTensor, ) -> pt.LongTensor: masked_token_ids = ids.masked_select(tokens_mask) masked_token_ids[indexes] = tokenizer.mask_token_id masked_ids = ids.masked_scatter(tokens_mask, masked_token_ids) masked_ids = cast(pt.LongTensor, masked_ids) return masked_ids