예제 #1
0
    def tensorize(self, batch):
        (
            tokens,
            segment_labels,
            seq_lens,
            positions,
            answer_start_idx,
            answer_end_idx,
            start_logits,
            end_logits,
            has_answer_logits,
        ) = zip(*batch)

        tensor_tuple = super().tensorize(
            zip(
                tokens,
                segment_labels,
                seq_lens,
                positions,
                answer_start_idx,
                answer_end_idx,
            ))
        return tensor_tuple + (
            pad_and_tensorize(start_logits, dtype=torch.float),
            pad_and_tensorize(end_logits, dtype=torch.float),
            pad_and_tensorize(
                has_answer_logits,
                dtype=torch.float,
                pad_shape=[len(has_answer_logits),
                           len(has_answer_logits[0])],
            ),
        )
예제 #2
0
 def tensorize(self, batch):
     tokens, segment_labels, seq_lens = zip(*batch)
     tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index())
     pad_mask = (tokens != self.vocab.get_pad_index()).long()
     segment_labels = pad_and_tensorize(segment_labels,
                                        self.vocab.get_pad_index())
     return tokens, pad_mask, segment_labels
예제 #3
0
 def tensorize(self, batch):
     (
         tokens,
         segment_labels,
         seq_len,
         positions,
         answer_start_idx,
         answer_end_idx,
     ) = zip(*batch)
     tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index())
     segment_labels = pad_and_tensorize(segment_labels,
                                        self.vocab.get_pad_index())
     pad_mask = (tokens != self.vocab.get_pad_index()).long()
     positions = pad_and_tensorize(positions)
     answer_start_idx = pad_and_tensorize(answer_start_idx,
                                          self.SPAN_PAD_IDX)
     answer_end_idx = pad_and_tensorize(answer_end_idx, self.SPAN_PAD_IDX)
     return (
         tokens,
         pad_mask,
         segment_labels,
         positions,
         answer_start_idx,
         answer_end_idx,
     )
예제 #4
0
 def tensorize(self, batch):
     (
         doc_tokens,
         doc_seq_len,
         ques_tokens,
         ques_seq_len,
         answer_start_idx,
         answer_end_idx,
         start_logits,
         end_logits,
         has_answer_logits,
     ) = zip(*batch)
     tensor_tuple = super().tensorize(
         zip(
             doc_tokens,
             doc_seq_len,
             ques_tokens,
             ques_seq_len,
             answer_start_idx,
             answer_end_idx,
         ))
     return tensor_tuple + (
         pad_and_tensorize(start_logits, dtype=torch.float),
         pad_and_tensorize(end_logits, dtype=torch.float),
         pad_and_tensorize(has_answer_logits, dtype=torch.float),
     )
예제 #5
0
 def tensorize(self, batch) -> Tuple[torch.Tensor, ...]:
     tokens, segment_labels, seq_lens, positions, labels = zip(*batch)
     tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index())
     pad_mask = (tokens != self.vocab.get_pad_index()).long()
     segment_labels = pad_and_tensorize(segment_labels, self.vocab.get_pad_index())
     positions = pad_and_tensorize(positions)
     padded_labels = pad_and_tensorize(labels, self.labels_pad_idx)
     return tokens, pad_mask, segment_labels, positions, padded_labels
예제 #6
0
 def tensorize(self, batch) -> Tuple[torch.Tensor, ...]:
     """
     Convert instance level vectors into batch level tensors.
     """
     tokens, segment_labels, seq_lens, positions = zip(*batch)
     tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index())
     pad_mask = (tokens != self.vocab.get_pad_index()).long()
     segment_labels = pad_and_tensorize(segment_labels)
     positions = pad_and_tensorize(positions)
     return tokens, pad_mask, segment_labels, positions
예제 #7
0
 def tensorize(self, batch):
     tokens, seq_lens, token_ranges = zip(*batch)
     masked_source, masked_target = self.mask_and_tensorize(tokens)
     return (
         pad_and_tensorize(tokens, self.vocab.get_pad_index()),
         pad_and_tensorize(seq_lens),
         pad_and_tensorize(token_ranges),
         masked_source,
         masked_target,
     )
예제 #8
0
 def tensorize(self, batch) -> Tuple[torch.Tensor, ...]:
     tokens, seq_lens, lang_ids, positions = zip(*batch)
     padded_tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index())
     padded_lang_ids = pad_and_tensorize(lang_ids)
     if self.is_fairseq:
         positions = pad_and_tensorize(positions)
     else:
         positions = None
     pad_mask = (padded_tokens != self.vocab.get_pad_index()).long()
     return (
         padded_tokens,
         pad_mask,
         pad_and_tensorize(seq_lens),
         padded_lang_ids,
         positions,
     )
예제 #9
0
    def tensorize(self, batch):
        (
            doc_tokens,
            doc_seq_len,
            ques_tokens,
            ques_seq_len,
            answer_start_idx,
            answer_end_idx,
        ) = zip(*batch)
        doc_tokens = pad_and_tensorize(doc_tokens, self.vocab.get_pad_index())
        doc_mask = (
            doc_tokens == self.vocab.get_pad_index()).byte()  # 1 => pad
        ques_tokens = pad_and_tensorize(ques_tokens,
                                        self.vocab.get_pad_index())
        ques_mask = (
            ques_tokens == self.vocab.get_pad_index()).byte()  # 1 => pad
        answer_start_idx = pad_and_tensorize(answer_start_idx,
                                             self.SPAN_PAD_IDX)
        answer_end_idx = pad_and_tensorize(answer_end_idx, self.SPAN_PAD_IDX)

        # doc_tokens must be returned as the first element for
        # SquadMetricReporter._add_decoded_answer_batch_stats() to work
        return (
            doc_tokens,
            pad_and_tensorize(doc_seq_len),
            doc_mask,
            ques_tokens,
            pad_and_tensorize(ques_seq_len),
            ques_mask,
            answer_start_idx,
            answer_end_idx,
        )
예제 #10
0
 def arrange_targets(self, tensor_dict):
     return pad_and_tensorize(tensor_dict["actions"])
예제 #11
0
 def tensorize(self, batch):
     tokens, seq_lens = zip(*batch)
     return (
         pad_and_tensorize(tokens, self.vocab.get_pad_index()),
         pad_and_tensorize(seq_lens),
     )