Esempio n. 1
0
    def as_padded_tensor_dict(
            self, tokens: IndexedTokenList,
            padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]:
        # Different transformers use different padding values for tokens, but for mask and type id, the padding
        # value is always 0.

        tokens = tokens.copy()
        padding_lengths = padding_lengths.copy()

        offsets_tokens = tokens.pop("offsets")
        offsets_padding_lengths = padding_lengths.pop("offsets")

        tensor_dict = {
            key: torch.LongTensor(
                pad_sequence_to_length(
                    val,
                    padding_lengths[key],
                    default_value=lambda: 0 if "mask" in key or "type-ids" in
                    key else self._tokenizer.pad_token_id,
                ))
            for key, val in tokens.items()
        }

        tensor_dict["offsets"] = torch.LongTensor(
            pad_sequence_to_length(offsets_tokens,
                                   offsets_padding_lengths,
                                   default_value=lambda: (0, 0)))

        return tensor_dict
    def as_padded_tensor_dict(
            self, tokens: IndexedTokenList,
            padding_lengths: Dict[str, int]) -> Dict[str, torch.Tensor]:
        tokens = tokens.copy()
        padding_lengths = padding_lengths.copy()

        offsets_tokens = tokens.pop("offsets")
        offsets_padding_lengths = padding_lengths.pop("offsets")

        tensor_dict = self._matched_indexer.as_padded_tensor_dict(
            tokens, padding_lengths)
        tensor_dict["offsets"] = torch.LongTensor(
            pad_sequence_to_length(offsets_tokens,
                                   offsets_padding_lengths,
                                   default_value=lambda: (0, 0)))
        return tensor_dict