def try_sequence_as_slice(self, length): """ :param NumbersDict length: number of (time) frames :return: new shape which covers the old shape and one more data-batch, format (time,batch) :rtype: (NumbersDict,int) """ return [NumbersDict.max([self.max_num_frames_per_slice, length]), self.num_slices + 1]
def shapes_for_batches(self, batches, data_keys, batch_dim_first=False): """ :type batches: list[EngineBatch.Batch] :rtype: dict[str,list[int]] | None """ all_data_keys = set(data_keys) | {"data"} # The final device.data.shape is in format (time,batch,feature). shape = [NumbersDict(0), 0] # time,batch for batch in batches: shape = [ NumbersDict.max([shape[0], batch.max_num_frames_per_slice]), shape[1] + batch.num_slices ] if shape[1] == 0: return None assert shape[0].max_value() > 0 # Theano has some buggy behaviour with tensors with some shape of zero. # We will just use one dummy frame in that case. # The index will stay zero in that case. (see EngineUtil.assign_dev_data()) # However, also see the OutputLayer.output_index() behavior for forwarding. for k in all_data_keys: shape[0][k] = max(shape[0][k], 1) d = {k: [shape[0][k], shape[1]] for k in all_data_keys} for k in d: d[k] += self.get_data_shape(k) if batch_dim_first: # Just flip the first two dimensions. d = { k: [shape[1], shape[0]] + shape[2:] for (k, shape) in d.items() } return d
def shapes_for_batches(batches, data_keys, dataset=None, extern_data=None, enforce_min_len1=False): """ :param list[EngineBatch.Batch] batches: :param list[str] data_keys: :param Dataset dataset: :param TFNetwork.ExternData extern_data: detailed data description. only used for TensorFlow :param bool enforce_min_len1: :rtype: dict[str,list[int]] | None """ assert dataset or extern_data all_data_keys = set(data_keys) # The final device.data.shape is in format (time,batch,feature) in case of Theano. shape = [NumbersDict(0), 0] # time,batch for batch in batches: shape = [ NumbersDict.max([shape[0], batch.max_num_frames_per_slice]), shape[1] + batch.num_slices ] if shape[1] == 0: return None assert shape[0].max_value() > 0 # Theano has some buggy behaviour with tensors with some shape of zero. # We will just use one dummy frame in that case. # The index will stay zero in that case. (see EngineUtil.assign_dev_data()) # However, also see the OutputLayer.output_index() behavior for forwarding. if not extern_data or enforce_min_len1: # not needed if TensorFlow is used for k in all_data_keys: shape[0][k] = max(shape[0][k], 1) if extern_data: d = {} for k in all_data_keys: data_shape = list(extern_data.data[k].batch_shape) data_shape[extern_data.data[k].batch_dim_axis] = shape[1] if extern_data.data[k].have_time_axis(): data_shape[extern_data.data[k].time_dim_axis] = shape[0][k] assert all([n is not None for n in data_shape]), "data %r" % extern_data.data[k] d[k] = data_shape else: # shape via dataset d = {k: [shape[0][k], shape[1]] for k in all_data_keys} for k in all_data_keys: d[k] += dataset.get_data_shape(k) return d
def _get_context_window_left_right(self): """ :return: (ctx_left, ctx_right) :rtype: None|(NumbersDict,NumbersDict) """ if self.context_window: # One less because the original frame also counts, and context_window=1 means that we just have that single frame. # ctx_total is how much frames we add additionally. ctx_total = NumbersDict.max([self.context_window, 1]) - 1 # In case ctx_total is odd / context_window is even, we have to decide where to put one more frame. # To keep it consistent with e.g. 1D convolution with a kernel of even size, we add one more to the right. # See test_tfconv1d_evensize(). ctx_left = ctx_total // 2 ctx_right = ctx_total - ctx_left return ctx_left, ctx_right else: return None
def shapes_for_batches(self, batches, data_keys): """ :type batches: list[EngineBatch.Batch] :rtype: dict[str,list[int]] | None """ # The final device.data.shape is in format (time,batch,feature). shape = [NumbersDict(0), 0] # time,batch for batch in batches: shape = [NumbersDict.max([shape[0], batch.max_num_frames_per_slice]), shape[1] + batch.num_slices] if shape[1] == 0: return None assert shape[0].max_value() > 0 d = {k: [shape[0][k], shape[1]] for k in (set(data_keys) | {"data"})} for k in d: d[k] += self.get_data_shape(k) return d
def _get_context_window_left_right(self): """ :return: (ctx_left, ctx_right) :rtype: None|(NumbersDict,NumbersDict) """ if self.context_window: # One less because the original frame also counts, and context_window=1 means that we just have that single frame. # ctx_total is how much frames we add additionally. ctx_total = NumbersDict.max([self.context_window, 1]) - 1 # In case ctx_total is odd / context_window is even, we have to decide where to put one more frame. # To keep it consistent with e.g. 1D convolution with a kernel of even size, we add one more to the right. # See test_tfconv1d_evensize(). ctx_left = ctx_total // 2 ctx_right = ctx_total - ctx_left return ctx_left, ctx_right else: return None
def shapes_for_batches(batches, data_keys, dataset=None, extern_data=None, enforce_min_len1=False): """ :param list[EngineBatch.Batch] batches: :param list[str] data_keys: :param Dataset dataset: :param TFNetwork.ExternData extern_data: detailed data description. only used for TensorFlow :param bool enforce_min_len1: :rtype: dict[str,list[int]] | None """ assert dataset or extern_data all_data_keys = set(data_keys) # The final device.data.shape is in format (time,batch,feature) in case of Theano. shape = [NumbersDict(0), 0] # time,batch for batch in batches: shape = [NumbersDict.max([shape[0], batch.max_num_frames_per_slice]), shape[1] + batch.num_slices] if shape[1] == 0: return None assert shape[0].max_value() > 0 # Theano has some buggy behaviour with tensors with some shape of zero. # We will just use one dummy frame in that case. # The index will stay zero in that case. (see EngineUtil.assign_dev_data()) # However, also see the OutputLayer.output_index() behavior for forwarding. if not extern_data or enforce_min_len1: # not needed if TensorFlow is used for k in all_data_keys: shape[0][k] = max(shape[0][k], 1) if extern_data: d = {} for k in all_data_keys: data_shape = list(extern_data.data[k].batch_shape) data_shape[extern_data.data[k].batch_dim_axis] = shape[1] if extern_data.data[k].have_time_axis(): data_shape[extern_data.data[k].time_dim_axis] = shape[0][k] assert all([n is not None for n in data_shape]), "data %r" % extern_data.data[k] d[k] = data_shape else: # shape via dataset d = {k: [shape[0][k], shape[1]] for k in all_data_keys} for k in all_data_keys: d[k] += dataset.get_data_shape(k) return d
def analyze_dataset(options): """ :param options: argparse.Namespace """ print("Epoch: %i" % options.epoch, file=log.v3) print("Dataset keys:", dataset.get_data_keys(), file=log.v3) print("Dataset target keys:", dataset.get_target_list(), file=log.v3) assert options.key in dataset.get_data_keys() terminal_width, _ = Util.terminal_size() show_interactive_process_bar = (log.verbose[3] and (not log.verbose[5]) and terminal_width >= 0) start_time = time.time() num_seqs_stats = Stats() if options.endseq < 0: options.endseq = float("inf") recurrent = True used_data_keys = dataset.get_data_keys() batch_size = config.typed_value('batch_size', 1) max_seqs = config.int('max_seqs', -1) seq_drop = config.float('seq_drop', 0.0) max_seq_length = config.typed_value('max_seq_length', None) or config.float('max_seq_length', 0) max_pad_size = config.typed_value("max_pad_size", None) batches = dataset.generate_batches( recurrent_net=recurrent, batch_size=batch_size, max_seqs=max_seqs, max_seq_length=max_seq_length, max_pad_size=max_pad_size, seq_drop=seq_drop, used_data_keys=used_data_keys) step = 0 total_num_seqs = 0 total_num_frames = NumbersDict() total_num_used_frames = NumbersDict() try: while batches.has_more(): # See FeedDictDataProvider. batch, = batches.peek_next_n(1) assert isinstance(batch, Batch) if batch.start_seq > options.endseq: break dataset.load_seqs(batch.start_seq, batch.end_seq) complete_frac = batches.completed_frac() start_elapsed = time.time() - start_time try: num_seqs_s = str(dataset.num_seqs) except NotImplementedError: try: num_seqs_s = "~%i" % dataset.estimated_num_seqs except TypeError: # a number is required, not NoneType num_seqs_s = "?" progress_prefix = "%i/%s" % (batch.start_seq, num_seqs_s) progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100) if complete_frac > 0: total_time_estimated = start_elapsed / complete_frac remaining_estimated = total_time_estimated - start_elapsed progress += " (%s)" % hms(remaining_estimated) batch_max_time = NumbersDict.max([seq.frame_length for seq in batch.seqs]) * len(batch.seqs) batch_num_used_frames = sum([seq.frame_length for seq in batch.seqs], NumbersDict()) total_num_seqs += len(batch.seqs) num_seqs_stats.collect(numpy.array([len(batch.seqs)])) total_num_frames += batch_max_time total_num_used_frames += batch_num_used_frames print( "%s, batch %i, num seqs %i, frames %s, used %s (%s)" % ( progress, step, len(batch.seqs), batch_max_time, batch_num_used_frames, batch_num_used_frames / batch_max_time), file=log.v5) if show_interactive_process_bar: Util.progress_bar_with_time(complete_frac, prefix=progress_prefix) step += 1 batches.advance(1) finally: print("Done. Total time %s. More seqs which we did not dumped: %s" % ( hms(time.time() - start_time), batches.has_more()), file=log.v2) print("Dataset epoch %i, order %r." % (dataset.epoch, dataset.seq_ordering)) print("Num batches (steps): %i" % step, file=log.v1) print("Num seqs: %i" % total_num_seqs, file=log.v1) num_seqs_stats.dump(stream=log.v1, stream_prefix="Batch num seqs ") for key in used_data_keys: print("Data key %r:" % key, file=log.v1) print(" Num frames: %s" % total_num_frames[key], file=log.v1) print(" Num used frames: %s" % total_num_used_frames[key], file=log.v1) print(" Fraction used frames: %s" % (total_num_used_frames / total_num_frames)[key], file=log.v1) dataset.finish_epoch()
def _generate_batches(self, recurrent_net, batch_size, max_seqs=-1, seq_drop=0.0, max_seq_length=sys.maxsize, used_data_keys=None): """ :param bool recurrent_net: If True, the batch might have a batch seq dimension > 1. Otherwise, the batch seq dimension is always 1 and multiple seqs will be concatenated. :param int batch_size: Max number of frames in one batch. :param int max_seqs: Max number of seqs per batch. :param set(str)|None used_data_keys: """ if batch_size == 0: batch_size = sys.maxsize assert batch_size > 0 if max_seqs == -1: max_seqs = float('inf') assert max_seqs > 0 assert seq_drop <= 1.0 chunk_size = self.chunk_size chunk_step = self.chunk_step if not recurrent_net: if chunk_size != 0: print("Non-recurrent network, chunk size %i:%i ignored" % (chunk_size, chunk_step), file=log.v4) chunk_size = 0 batch = Batch() if self.context_window: ctx_lr = NumbersDict.max([self.context_window, 1]) - 1 ctx_left = ctx_lr // 2 ctx_right = ctx_lr - ctx_left else: ctx_left, ctx_right = 0, 0 for seq_idx, t_start, t_end in self.iterate_seqs( chunk_size=chunk_size, chunk_step=chunk_step, used_data_keys=used_data_keys): if ctx_left or ctx_right: t_start -= ctx_left t_end += ctx_right if recurrent_net: length = t_end - t_start if max_seq_length < 0 and length['classes'] > -max_seq_length: continue elif max_seq_length > 0 and length.max_value( ) > max_seq_length: continue if length.max_value() > batch_size: print( "warning: sequence length (%i) larger than limit (%i)" % (length.max_value(), batch_size), file=log.v4) if self.rnd_seq_drop.random() < seq_drop: continue dt, ds = batch.try_sequence_as_slice(length) if ds > 1 and ((dt * ds).max_value() > batch_size or ds > max_seqs): yield batch batch = Batch() batch.add_sequence_as_slice(seq_idx=seq_idx, seq_start_frame=t_start, length=length) else: # Not recurrent. while t_start.max_value() < t_end.max_value(): length = t_end - t_start num_frames = NumbersDict.min([ length, batch_size - batch.get_all_slices_num_frames() ]) assert num_frames.max_value() > 0 batch.add_frames(seq_idx=seq_idx, seq_start_frame=t_start, length=num_frames) if batch.get_all_slices_num_frames( ) >= batch_size or batch.get_num_seqs() > max_seqs: yield batch batch = Batch() t_start += num_frames if batch.get_all_slices_num_frames() > 0: yield batch