def add_frames(self, seq_idx, seq_start_frame, length, frame_dim_corresponds=True): """ Adds frames to all data-batches. Will add one data-batch if we don't have one yet. :param int seq_idx: :param NumbersDict|int seq_start_frame: :param NumbersDict length: number of (time) frames :param bool frame_dim_corresponds: if the batch frame offset should always be the same (max value) for all keys """ batch_frame_offset = self.max_num_frames_per_slice if frame_dim_corresponds: batch_frame_offset = NumbersDict(batch_frame_offset.max_value()) self.max_num_frames_per_slice = NumbersDict( self.max_num_frames_per_slice.max_value()) self.max_num_frames_per_slice += length self.num_slices = max(self.num_slices, 1) self.seqs += [ BatchSeqCopyPart(seq_idx=seq_idx, seq_start_frame=seq_start_frame, seq_end_frame=seq_start_frame + length, batch_slice=0, batch_frame_offset=batch_frame_offset) ]
def get_seq_length(self, seq_idx): """ :rtype: NumbersDict """ lengths = self.get_seq_length_nd(seq_idx) d = {"data": lengths[0]} for k, l in zip(self.target_keys, lengths[1:]): d[k] = l return NumbersDict(d)
def try_sequence_as_slice(self, length): """ :param NumbersDict length: number of (time) frames :return: new shape which covers the old shape and one more data-batch, format (time,batch) :rtype: (NumbersDict,int) """ return [ NumbersDict.max([self.max_num_frames_per_slice, length]), self.num_slices + 1 ]
class BatchSeqCopyPart: """ A batch used for training in RETURNN can consist of several parts from sequences, ordered in various ways. The dataset, depending on the configuration, can generate these. For the non-recurrent case, we usually concatenate them together into one slice. For the recurrent case, we have a single slice per sequence, or even multiple slices for a sequence in case of chunking. This class represents one single such part and where it is going to be stored in the batch. """ def __init__(self, seq_idx, seq_start_frame, seq_end_frame, batch_slice, batch_frame_offset): """ :type seq_idx: int :type seq_start_frame: NumbersDict | int :type seq_end_frame: NumbersDict | int Frame idx are input seq, output seq. :type batch_slice: int :type batch_frame_offset: int | NumbersDict """ self.seq_idx = seq_idx self.seq_start_frame = NumbersDict(seq_start_frame) self.seq_end_frame = NumbersDict(seq_end_frame) self.batch_slice = batch_slice self.batch_frame_offset = NumbersDict(batch_frame_offset) assert self.seq_start_frame.has_values() assert self.seq_end_frame.has_values() assert self.batch_frame_offset.has_values() @property def frame_length(self): """ :rtype: NumbersDict """ return self.seq_end_frame - self.seq_start_frame def __repr__(self): keys = ("seq_idx", "seq_start_frame", "seq_end_frame", "batch_slice", "batch_frame_offset") return "<BatchSeqCopyPart %s>" % " ".join( ["%s=%r" % (k, getattr(self, k)) for k in keys])
def get_seq_length(self, seq_idx): """ :rtype: NumbersDict """ lengths = self.get_seq_length_nd(seq_idx) d = {} first_target_idx = 0 # We allow using only targets. In this case self.num_inputs == 0 and the "data" key is not used. if self.num_inputs > 0: d["data"] = lengths[0] first_target_idx = 1 for k, l in zip(self.target_keys, lengths[first_target_idx:]): d[k] = l return NumbersDict(d)
def __init__(self, seq_idx, seq_start_frame, seq_end_frame, batch_slice, batch_frame_offset): """ :type seq_idx: int :type seq_start_frame: NumbersDict | int :type seq_end_frame: NumbersDict | int Frame idx are input seq, output seq. :type batch_slice: int :type batch_frame_offset: int | NumbersDict """ self.seq_idx = seq_idx self.seq_start_frame = NumbersDict(seq_start_frame) self.seq_end_frame = NumbersDict(seq_end_frame) self.batch_slice = batch_slice self.batch_frame_offset = NumbersDict(batch_frame_offset) assert self.seq_start_frame.has_values() assert self.seq_end_frame.has_values() assert self.batch_frame_offset.has_values()
def __init__(self): self.max_num_frames_per_slice = NumbersDict(0) self.num_slices = 0 # original data_shape = [0, 0], format (time,batch/slice) # data_shape = [max_num_frames_per_slice, num_slices] self.seqs = [] # type: typing.List[BatchSeqCopyPart]
class Batch: """ A batch can consists of several sequences (= segments). This is basically just a list of BatchSeqCopyPart. """ def __init__(self): self.max_num_frames_per_slice = NumbersDict(0) self.num_slices = 0 # original data_shape = [0, 0], format (time,batch/slice) # data_shape = [max_num_frames_per_slice, num_slices] self.seqs = [] # type: typing.List[BatchSeqCopyPart] def __repr__(self): return "<Batch start_seq:%r, len(seqs):%i>" % (self.start_seq, len(self.seqs)) def try_sequence_as_slice(self, length): """ :param NumbersDict length: number of (time) frames :return: new shape which covers the old shape and one more data-batch, format (time,batch) :rtype: (NumbersDict,int) """ return [ NumbersDict.max([self.max_num_frames_per_slice, length]), self.num_slices + 1 ] def add_sequence_as_slice(self, seq_idx, seq_start_frame, length): """ Adds one data-batch in an additional slice. :param int seq_idx: :param NumbersDict|int seq_start_frame: :param NumbersDict length: number of (time) frames """ self.max_num_frames_per_slice, self.num_slices = self.try_sequence_as_slice( length) self.seqs += [ BatchSeqCopyPart(seq_idx=seq_idx, seq_start_frame=seq_start_frame, seq_end_frame=seq_start_frame + length, batch_slice=self.num_slices - 1, batch_frame_offset=0) ] def add_frames(self, seq_idx, seq_start_frame, length, frame_dim_corresponds=True): """ Adds frames to all data-batches. Will add one data-batch if we don't have one yet. :param int seq_idx: :param NumbersDict|int seq_start_frame: :param NumbersDict length: number of (time) frames :param bool frame_dim_corresponds: if the batch frame offset should always be the same (max value) for all keys """ batch_frame_offset = self.max_num_frames_per_slice if frame_dim_corresponds: batch_frame_offset = NumbersDict(batch_frame_offset.max_value()) self.max_num_frames_per_slice = NumbersDict( self.max_num_frames_per_slice.max_value()) self.max_num_frames_per_slice += length self.num_slices = max(self.num_slices, 1) self.seqs += [ BatchSeqCopyPart(seq_idx=seq_idx, seq_start_frame=seq_start_frame, seq_end_frame=seq_start_frame + length, batch_slice=0, batch_frame_offset=batch_frame_offset) ] def init_with_one_full_sequence(self, seq_idx, dataset): """ :param int seq_idx: :param Dataset.Dataset dataset: """ assert not self.seqs start, end = dataset.get_start_end_frames_full_seq(seq_idx) self.add_frames(seq_idx=seq_idx, seq_start_frame=start, length=end - start) def get_all_slices_num_frames(self): """ Note that this is only an upper limit in case of data_shape[1] > 1 because data_shape[0] is the max frame len of all seqs. :return: related to the data-key with max length :rtype: NumbersDict """ return self.max_num_frames_per_slice * self.num_slices def get_total_num_frames(self): """ :rtype: NumbersDict """ return sum([s.frame_length for s in self.seqs]) @property def start_seq(self): """ :rtype: int|None """ if not self.seqs: return None return min([s.seq_idx for s in self.seqs]) @property def end_seq(self): """ :rtype: int|None """ if not self.seqs: return None return max([s.seq_idx for s in self.seqs]) + 1 def get_num_seqs(self): """ :rtype: int """ if not self.seqs: return 0 return self.end_seq - self.start_seq