Ejemplo n.º 1
0
 def _prepare_batch_for_alignment(self, sample, hypothesis):
     src_tokens = sample["net_input"]["src_tokens"]
     bsz = src_tokens.shape[0]
     src_tokens = (src_tokens[:, None, :].expand(-1, self.beam_size,
                                                 -1).contiguous().view(
                                                     bsz * self.beam_size,
                                                     -1))
     src_lengths = sample["net_input"]["src_lengths"]
     src_lengths = (src_lengths[:, None].expand(
         -1, self.beam_size).contiguous().view(bsz * self.beam_size))
     prev_output_tokens = data_utils.collate_tokens(
         [beam["tokens"] for example in hypothesis for beam in example],
         self.pad,
         self.eos,
         self.left_pad_target,
         move_eos_to_beginning=True,
     )
     tgt_tokens = data_utils.collate_tokens(
         [beam["tokens"] for example in hypothesis for beam in example],
         self.pad,
         self.eos,
         self.left_pad_target,
         move_eos_to_beginning=False,
     )
     return src_tokens, src_lengths, prev_output_tokens, tgt_tokens
Ejemplo n.º 2
0
 def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None):
     return data_utils.collate_tokens(
         [s[key] for s in samples],
         pad_idx,
         eos_idx,
         left_pad,
         move_eos_to_beginning,
         pad_to_length=pad_to_length,
     )
Ejemplo n.º 3
0
    def collate(self, samples):
        """
        utility function to collate samples into batch for speech recognition.
        """
        if len(samples) == 0:
            return {}

        # parse samples into torch tensors
        parsed_samples = []
        for s in samples:
            # skip invalid samples
            if s["data"][self.feature_index] is None:
                continue
            source = s["data"][self.feature_index]
            if isinstance(source, (np.ndarray, np.generic)):
                source = torch.from_numpy(source)
            target = s["data"][self.label_index]
            if isinstance(target, (np.ndarray, np.generic)):
                target = torch.from_numpy(target).long()
            elif isinstance(target, list):
                target = torch.LongTensor(target)

            parsed_sample = {"id": s["id"], "source": source, "target": target}
            parsed_samples.append(parsed_sample)
        samples = parsed_samples

        id = torch.LongTensor([s["id"] for s in samples])
        frames = self._collate_frames([s["source"] for s in samples])
        # sort samples by descending number of frames
        frames_lengths = torch.LongTensor([s["source"].size(0) for s in samples])
        frames_lengths, sort_order = frames_lengths.sort(descending=True)
        id = id.index_select(0, sort_order)
        frames = frames.index_select(0, sort_order)

        target = None
        target_lengths = None
        prev_output_tokens = None
        if samples[0].get("target", None) is not None:
            ntokens = sum(len(s["target"]) for s in samples)
            target = fairseq_data_utils.collate_tokens(
                [s["target"] for s in samples],
                self.pad_index,
                self.eos_index,
                left_pad=False,
                move_eos_to_beginning=False,
            )
            target = target.index_select(0, sort_order)
            target_lengths = torch.LongTensor(
                [s["target"].size(0) for s in samples]
            ).index_select(0, sort_order)
            prev_output_tokens = fairseq_data_utils.collate_tokens(
                [s["target"] for s in samples],
                self.pad_index,
                self.eos_index,
                left_pad=False,
                move_eos_to_beginning=self.move_eos_to_beginning,
            )
            prev_output_tokens = prev_output_tokens.index_select(0, sort_order)
        else:
            ntokens = sum(len(s["source"]) for s in samples)

        batch = {
            "id": id,
            "ntokens": ntokens,
            "net_input": {"src_tokens": frames, "src_lengths": frames_lengths},
            "target": target,
            "target_lengths": target_lengths,
            "nsentences": len(samples),
        }
        if prev_output_tokens is not None:
            batch["net_input"]["prev_output_tokens"] = prev_output_tokens
        return batch
Ejemplo n.º 4
0
 def merge(key):
     return data_utils.collate_tokens([s[key] for s in samples],
                                      pad_idx,
                                      eos_idx,
                                      left_pad=False)
Ejemplo n.º 5
0
 def collater(self, samples):
     return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad)