def as_padded_tensor_dict( self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]: # Different transformers use different padding values for tokens, but for mask and type id, the padding # value is always 0. tokens = tokens.copy() padding_lengths = padding_lengths.copy() offsets_tokens = tokens.pop("offsets") offsets_padding_lengths = padding_lengths.pop("offsets") tensor_dict = { key: torch.LongTensor( pad_sequence_to_length( val, padding_lengths[key], default_value=lambda: 0 if "mask" in key or "type-ids" in key else self._tokenizer.pad_token_id, )) for key, val in tokens.items() } tensor_dict["offsets"] = torch.LongTensor( pad_sequence_to_length(offsets_tokens, offsets_padding_lengths, default_value=lambda: (0, 0))) return tensor_dict
def as_padded_tensor_dict( self, tokens: IndexedTokenList, padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]: tokens = tokens.copy() padding_lengths = padding_lengths.copy() offsets_tokens = tokens.pop("offsets") offsets_padding_lengths = padding_lengths.pop("offsets") tensor_dict = self._matched_indexer.as_padded_tensor_dict( tokens, padding_lengths) tensor_dict["offsets"] = torch.LongTensor( pad_sequence_to_length(offsets_tokens, offsets_padding_lengths, default_value=lambda: (0, 0))) return tensor_dict