Esempio n. 1
0
    def pad_token_sequence(
            self, tokens: Dict[str,
                               List[List[int]]], desired_num_tokens: Dict[str,
                                                                          int],
            padding_length: Dict[str, int]) -> Dict[str, List[List[int]]]:
        key = list(tokens.keys())[0]

        padded_ = pad_sequence_to_length(tokens[key],
                                         desired_num_tokens[key],
                                         default_value=self.get_padding_token)

        # Pad all characters within the tokens
        desired_token_length = padding_length['num_token_characters']
        longest_token: List[int] = max(tokens[key], key=len, default=[])
        padding_value = 0

        if desired_token_length > len(longest_token):
            padded_.append([padding_value] * desired_token_length)

        padded_ = list(
            zip(*itertools.zip_longest(*padded_, fillvalue=padding_value)))

        if desired_token_length > len(longest_token):
            padded_.pop()

        return {key: [list(token[:desired_token_length]) for token in padded_]}
Esempio n. 2
0
 def pad_token_sequence(
     self, tokens: Dict[str, List[int]], desired_num_tokens: Dict[str, int],
     padding_lengths: Dict[str, int]
 ) -> Dict[str, List[int]]:  # pylint: disable=unused-argument
     return {
         key: pad_sequence_to_length(val, desired_num_tokens[key])
         for key, val in tokens.items()
     }
 def pad_token_sequence(
         self, tokens: Dict[str, List[int]], desired_num_tokens: Dict[str,
                                                                      int],
         padding_length: Dict[str, int]) -> Dict[str, List[int]]:
     return {
         key: pad_sequence_to_length(val, desired_num_tokens[key])
         for key, val in tokens.items()
     }
Esempio n. 4
0
 def as_tensor(self, padding_lengths: Dict[str,
                                           int]) -> Dict[str, torch.Tensor]:
     padded_field_list = pad_sequence_to_length(
         self.field_list, padding_lengths['num_fields'],
         self.field_list[0].empty_field)
     # Here we're removing the scoping on the padding length keys that we added in
     # `get_padding_lengths`; see the note there for more detail.
     child_padding_lengths = {
         key.replace('list_', '', 1): value
         for key, value in padding_lengths.items()
         if key.startswith('list_')
     }
     padded_fields = [
         field.as_tensor(child_padding_lengths)
         for field in padded_field_list
     ]
     return self.field_list[0].batch_tensors(padded_fields)
Esempio n. 5
0
 def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
     desired_num_tokens = padding_lengths['num_tokens']
     padded_tags = pad_sequence_to_length(self.indexed_labels,
                                          desired_num_tokens)
     tensor_ = torch.LongTensor(padded_tags)
     return tensor_