def tensorize(self, batch): ( tokens, segment_labels, seq_lens, positions, answer_start_idx, answer_end_idx, start_logits, end_logits, has_answer_logits, ) = zip(*batch) tensor_tuple = super().tensorize( zip( tokens, segment_labels, seq_lens, positions, answer_start_idx, answer_end_idx, )) return tensor_tuple + ( pad_and_tensorize(start_logits, dtype=torch.float), pad_and_tensorize(end_logits, dtype=torch.float), pad_and_tensorize( has_answer_logits, dtype=torch.float, pad_shape=[len(has_answer_logits), len(has_answer_logits[0])], ), )
def tensorize(self, batch): tokens, segment_labels, seq_lens = zip(*batch) tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index()) pad_mask = (tokens != self.vocab.get_pad_index()).long() segment_labels = pad_and_tensorize(segment_labels, self.vocab.get_pad_index()) return tokens, pad_mask, segment_labels
def tensorize(self, batch): ( tokens, segment_labels, seq_len, positions, answer_start_idx, answer_end_idx, ) = zip(*batch) tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index()) segment_labels = pad_and_tensorize(segment_labels, self.vocab.get_pad_index()) pad_mask = (tokens != self.vocab.get_pad_index()).long() positions = pad_and_tensorize(positions) answer_start_idx = pad_and_tensorize(answer_start_idx, self.SPAN_PAD_IDX) answer_end_idx = pad_and_tensorize(answer_end_idx, self.SPAN_PAD_IDX) return ( tokens, pad_mask, segment_labels, positions, answer_start_idx, answer_end_idx, )
def tensorize(self, batch): ( doc_tokens, doc_seq_len, ques_tokens, ques_seq_len, answer_start_idx, answer_end_idx, start_logits, end_logits, has_answer_logits, ) = zip(*batch) tensor_tuple = super().tensorize( zip( doc_tokens, doc_seq_len, ques_tokens, ques_seq_len, answer_start_idx, answer_end_idx, )) return tensor_tuple + ( pad_and_tensorize(start_logits, dtype=torch.float), pad_and_tensorize(end_logits, dtype=torch.float), pad_and_tensorize(has_answer_logits, dtype=torch.float), )
def tensorize(self, batch) -> Tuple[torch.Tensor, ...]: tokens, segment_labels, seq_lens, positions, labels = zip(*batch) tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index()) pad_mask = (tokens != self.vocab.get_pad_index()).long() segment_labels = pad_and_tensorize(segment_labels, self.vocab.get_pad_index()) positions = pad_and_tensorize(positions) padded_labels = pad_and_tensorize(labels, self.labels_pad_idx) return tokens, pad_mask, segment_labels, positions, padded_labels
def tensorize(self, batch) -> Tuple[torch.Tensor, ...]: """ Convert instance level vectors into batch level tensors. """ tokens, segment_labels, seq_lens, positions = zip(*batch) tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index()) pad_mask = (tokens != self.vocab.get_pad_index()).long() segment_labels = pad_and_tensorize(segment_labels) positions = pad_and_tensorize(positions) return tokens, pad_mask, segment_labels, positions
def tensorize(self, batch): tokens, seq_lens, token_ranges = zip(*batch) masked_source, masked_target = self.mask_and_tensorize(tokens) return ( pad_and_tensorize(tokens, self.vocab.get_pad_index()), pad_and_tensorize(seq_lens), pad_and_tensorize(token_ranges), masked_source, masked_target, )
def tensorize(self, batch) -> Tuple[torch.Tensor, ...]: tokens, seq_lens, lang_ids, positions = zip(*batch) padded_tokens = pad_and_tensorize(tokens, self.vocab.get_pad_index()) padded_lang_ids = pad_and_tensorize(lang_ids) if self.is_fairseq: positions = pad_and_tensorize(positions) else: positions = None pad_mask = (padded_tokens != self.vocab.get_pad_index()).long() return ( padded_tokens, pad_mask, pad_and_tensorize(seq_lens), padded_lang_ids, positions, )
def tensorize(self, batch): ( doc_tokens, doc_seq_len, ques_tokens, ques_seq_len, answer_start_idx, answer_end_idx, ) = zip(*batch) doc_tokens = pad_and_tensorize(doc_tokens, self.vocab.get_pad_index()) doc_mask = ( doc_tokens == self.vocab.get_pad_index()).byte() # 1 => pad ques_tokens = pad_and_tensorize(ques_tokens, self.vocab.get_pad_index()) ques_mask = ( ques_tokens == self.vocab.get_pad_index()).byte() # 1 => pad answer_start_idx = pad_and_tensorize(answer_start_idx, self.SPAN_PAD_IDX) answer_end_idx = pad_and_tensorize(answer_end_idx, self.SPAN_PAD_IDX) # doc_tokens must be returned as the first element for # SquadMetricReporter._add_decoded_answer_batch_stats() to work return ( doc_tokens, pad_and_tensorize(doc_seq_len), doc_mask, ques_tokens, pad_and_tensorize(ques_seq_len), ques_mask, answer_start_idx, answer_end_idx, )
def arrange_targets(self, tensor_dict): return pad_and_tensorize(tensor_dict["actions"])
def tensorize(self, batch): tokens, seq_lens = zip(*batch) return ( pad_and_tensorize(tokens, self.vocab.get_pad_index()), pad_and_tensorize(seq_lens), )