Esempio n. 1
0
    def add_frames(self,
                   seq_idx,
                   seq_start_frame,
                   length,
                   frame_dim_corresponds=True):
        """
    Adds frames to all data-batches.
    Will add one data-batch if we don't have one yet.

    :param int seq_idx:
    :param NumbersDict|int seq_start_frame:
    :param NumbersDict length: number of (time) frames
    :param bool frame_dim_corresponds: if the batch frame offset should always be the same (max value) for all keys
    """
        batch_frame_offset = self.max_num_frames_per_slice
        if frame_dim_corresponds:
            batch_frame_offset = NumbersDict(batch_frame_offset.max_value())
            self.max_num_frames_per_slice = NumbersDict(
                self.max_num_frames_per_slice.max_value())
        self.max_num_frames_per_slice += length
        self.num_slices = max(self.num_slices, 1)
        self.seqs += [
            BatchSeqCopyPart(seq_idx=seq_idx,
                             seq_start_frame=seq_start_frame,
                             seq_end_frame=seq_start_frame + length,
                             batch_slice=0,
                             batch_frame_offset=batch_frame_offset)
        ]
Esempio n. 2
0
 def get_seq_length(self, seq_idx):
   """
   :rtype: NumbersDict
   """
   lengths = self.get_seq_length_nd(seq_idx)
   d = {"data": lengths[0]}
   for k, l in zip(self.target_keys, lengths[1:]):
     d[k] = l
   return NumbersDict(d)
Esempio n. 3
0
 def try_sequence_as_slice(self, length):
     """
 :param NumbersDict length: number of (time) frames
 :return: new shape which covers the old shape and one more data-batch, format (time,batch)
 :rtype: (NumbersDict,int)
 """
     return [
         NumbersDict.max([self.max_num_frames_per_slice, length]),
         self.num_slices + 1
     ]
Esempio n. 4
0
class BatchSeqCopyPart:
    """
  A batch used for training in RETURNN can consist of several parts from sequences,
   ordered in various ways. The dataset, depending on the configuration, can
   generate these. For the non-recurrent case, we usually concatenate
   them together into one slice. For the recurrent case, we have a single
   slice per sequence, or even multiple slices for a sequence in case of chunking.
  This class represents one single such part and where it is going to
   be stored in the batch.
  """
    def __init__(self, seq_idx, seq_start_frame, seq_end_frame, batch_slice,
                 batch_frame_offset):
        """
    :type seq_idx: int
    :type seq_start_frame: NumbersDict | int
    :type seq_end_frame: NumbersDict | int
      Frame idx are input seq, output seq.
    :type batch_slice: int
    :type batch_frame_offset: int | NumbersDict
    """
        self.seq_idx = seq_idx
        self.seq_start_frame = NumbersDict(seq_start_frame)
        self.seq_end_frame = NumbersDict(seq_end_frame)
        self.batch_slice = batch_slice
        self.batch_frame_offset = NumbersDict(batch_frame_offset)
        assert self.seq_start_frame.has_values()
        assert self.seq_end_frame.has_values()
        assert self.batch_frame_offset.has_values()

    @property
    def frame_length(self):
        """
    :rtype: NumbersDict
    """
        return self.seq_end_frame - self.seq_start_frame

    def __repr__(self):
        keys = ("seq_idx", "seq_start_frame", "seq_end_frame", "batch_slice",
                "batch_frame_offset")
        return "<BatchSeqCopyPart %s>" % " ".join(
            ["%s=%r" % (k, getattr(self, k)) for k in keys])
Esempio n. 5
0
 def get_seq_length(self, seq_idx):
     """
 :rtype: NumbersDict
 """
     lengths = self.get_seq_length_nd(seq_idx)
     d = {}
     first_target_idx = 0
     # We allow using only targets. In this case self.num_inputs == 0 and the "data" key is not used.
     if self.num_inputs > 0:
         d["data"] = lengths[0]
         first_target_idx = 1
     for k, l in zip(self.target_keys, lengths[first_target_idx:]):
         d[k] = l
     return NumbersDict(d)
Esempio n. 6
0
 def __init__(self, seq_idx, seq_start_frame, seq_end_frame, batch_slice,
              batch_frame_offset):
     """
 :type seq_idx: int
 :type seq_start_frame: NumbersDict | int
 :type seq_end_frame: NumbersDict | int
   Frame idx are input seq, output seq.
 :type batch_slice: int
 :type batch_frame_offset: int | NumbersDict
 """
     self.seq_idx = seq_idx
     self.seq_start_frame = NumbersDict(seq_start_frame)
     self.seq_end_frame = NumbersDict(seq_end_frame)
     self.batch_slice = batch_slice
     self.batch_frame_offset = NumbersDict(batch_frame_offset)
     assert self.seq_start_frame.has_values()
     assert self.seq_end_frame.has_values()
     assert self.batch_frame_offset.has_values()
Esempio n. 7
0
 def __init__(self):
     self.max_num_frames_per_slice = NumbersDict(0)
     self.num_slices = 0
     # original data_shape = [0, 0], format (time,batch/slice)
     #          data_shape = [max_num_frames_per_slice, num_slices]
     self.seqs = []  # type: typing.List[BatchSeqCopyPart]
Esempio n. 8
0
class Batch:
    """
  A batch can consists of several sequences (= segments).
  This is basically just a list of BatchSeqCopyPart.
  """
    def __init__(self):
        self.max_num_frames_per_slice = NumbersDict(0)
        self.num_slices = 0
        # original data_shape = [0, 0], format (time,batch/slice)
        #          data_shape = [max_num_frames_per_slice, num_slices]
        self.seqs = []  # type: typing.List[BatchSeqCopyPart]

    def __repr__(self):
        return "<Batch start_seq:%r, len(seqs):%i>" % (self.start_seq,
                                                       len(self.seqs))

    def try_sequence_as_slice(self, length):
        """
    :param NumbersDict length: number of (time) frames
    :return: new shape which covers the old shape and one more data-batch, format (time,batch)
    :rtype: (NumbersDict,int)
    """
        return [
            NumbersDict.max([self.max_num_frames_per_slice, length]),
            self.num_slices + 1
        ]

    def add_sequence_as_slice(self, seq_idx, seq_start_frame, length):
        """
    Adds one data-batch in an additional slice.

    :param int seq_idx:
    :param NumbersDict|int seq_start_frame:
    :param NumbersDict length: number of (time) frames
    """
        self.max_num_frames_per_slice, self.num_slices = self.try_sequence_as_slice(
            length)
        self.seqs += [
            BatchSeqCopyPart(seq_idx=seq_idx,
                             seq_start_frame=seq_start_frame,
                             seq_end_frame=seq_start_frame + length,
                             batch_slice=self.num_slices - 1,
                             batch_frame_offset=0)
        ]

    def add_frames(self,
                   seq_idx,
                   seq_start_frame,
                   length,
                   frame_dim_corresponds=True):
        """
    Adds frames to all data-batches.
    Will add one data-batch if we don't have one yet.

    :param int seq_idx:
    :param NumbersDict|int seq_start_frame:
    :param NumbersDict length: number of (time) frames
    :param bool frame_dim_corresponds: if the batch frame offset should always be the same (max value) for all keys
    """
        batch_frame_offset = self.max_num_frames_per_slice
        if frame_dim_corresponds:
            batch_frame_offset = NumbersDict(batch_frame_offset.max_value())
            self.max_num_frames_per_slice = NumbersDict(
                self.max_num_frames_per_slice.max_value())
        self.max_num_frames_per_slice += length
        self.num_slices = max(self.num_slices, 1)
        self.seqs += [
            BatchSeqCopyPart(seq_idx=seq_idx,
                             seq_start_frame=seq_start_frame,
                             seq_end_frame=seq_start_frame + length,
                             batch_slice=0,
                             batch_frame_offset=batch_frame_offset)
        ]

    def init_with_one_full_sequence(self, seq_idx, dataset):
        """
    :param int seq_idx:
    :param Dataset.Dataset dataset:
    """
        assert not self.seqs
        start, end = dataset.get_start_end_frames_full_seq(seq_idx)
        self.add_frames(seq_idx=seq_idx,
                        seq_start_frame=start,
                        length=end - start)

    def get_all_slices_num_frames(self):
        """
    Note that this is only an upper limit in case of data_shape[1] > 1
    because data_shape[0] is the max frame len of all seqs.

    :return: related to the data-key with max length
    :rtype: NumbersDict
    """
        return self.max_num_frames_per_slice * self.num_slices

    def get_total_num_frames(self):
        """
    :rtype: NumbersDict
    """
        return sum([s.frame_length for s in self.seqs])

    @property
    def start_seq(self):
        """
    :rtype: int|None
    """
        if not self.seqs:
            return None
        return min([s.seq_idx for s in self.seqs])

    @property
    def end_seq(self):
        """
    :rtype: int|None
    """
        if not self.seqs:
            return None
        return max([s.seq_idx for s in self.seqs]) + 1

    def get_num_seqs(self):
        """
    :rtype: int
    """
        if not self.seqs:
            return 0
        return self.end_seq - self.start_seq