コード例 #1
0
def get_raw_strings(dataset, options):
  """
  :param Dataset dataset:
  :param options: argparse.Namespace
  :return: list of (seq tag, string)
  :rtype: list[(str,str)]
  """
  refs = []
  start_time = time.time()
  seq_len_stats = Stats()
  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  interactive = Util.is_tty() and not log.verbose[5]
  print("Iterating over %r." % dataset, file=log.v2)
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    dataset.load_seqs(seq_idx, seq_idx + 1)
    complete_frac = dataset.get_complete_frac(seq_idx)
    start_elapsed = time.time() - start_time
    try:
      num_seqs_s = str(dataset.num_seqs)
    except NotImplementedError:
      try:
        num_seqs_s = "~%i" % dataset.estimated_num_seqs
      except TypeError:  # a number is required, not NoneType
        num_seqs_s = "?"
    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s,)
    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
    if complete_frac > 0:
      total_time_estimated = start_elapsed / complete_frac
      remaining_estimated = total_time_estimated - start_elapsed
      progress += " (%s)" % hms(remaining_estimated)
    seq_tag = dataset.get_tag(seq_idx)
    assert isinstance(seq_tag, str)
    ref = dataset.get_data(seq_idx, options.key)
    if isinstance(ref, numpy.ndarray):
      assert ref.shape == () or (ref.ndim == 1 and ref.dtype == numpy.uint8)
      if ref.shape == ():
        ref = ref.flatten()[0]  # get the entry itself (str or bytes)
      else:
        ref = ref.tobytes()
    if isinstance(ref, bytes):
      ref = ref.decode("utf8")
    assert isinstance(ref, str)
    seq_len_stats.collect([len(ref)])
    refs.append((seq_tag, ref))
    if interactive:
      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)
    elif log.verbose[5]:
      print(progress_prefix, "seq tag %r, ref len %i chars" % (seq_tag, len(ref)))
    seq_idx += 1
  print("Done. Num seqs %i. Total time %s." % (
    seq_idx, hms(time.time() - start_time)), file=log.v1)
  print("More seqs which we did not dumped: %s." % (
    dataset.is_less_than_num_seqs(seq_idx),), file=log.v1)
  seq_len_stats.dump(stream_prefix="Seq-length %r " % (options.key,), stream=log.v2)
  return refs
コード例 #2
0
def get_raw_strings(dataset, options):
  """
  :param Dataset dataset:
  :param options: argparse.Namespace
  :return: list of (seq tag, string)
  :rtype: list[(str,str)]
  """
  refs = []
  start_time = time.time()
  seq_len_stats = Stats()
  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  interactive = Util.is_tty() and not log.verbose[5]
  print("Iterating over %r." % dataset, file=log.v2)
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    dataset.load_seqs(seq_idx, seq_idx + 1)
    complete_frac = dataset.get_complete_frac(seq_idx)
    start_elapsed = time.time() - start_time
    try:
      num_seqs_s = str(dataset.num_seqs)
    except NotImplementedError:
      try:
        num_seqs_s = "~%i" % dataset.estimated_num_seqs
      except TypeError:  # a number is required, not NoneType
        num_seqs_s = "?"
    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s,)
    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
    if complete_frac > 0:
      total_time_estimated = start_elapsed / complete_frac
      remaining_estimated = total_time_estimated - start_elapsed
      progress += " (%s)" % hms(remaining_estimated)
    seq_tag = dataset.get_tag(seq_idx)
    assert isinstance(seq_tag, str)
    ref = dataset.get_data(seq_idx, options.key)
    if isinstance(ref, numpy.ndarray):
      assert ref.shape == () or (ref.ndim == 1 and ref.dtype == numpy.uint8)
      if ref.shape == ():
        ref = ref.flatten()[0]  # get the entry itself (str or bytes)
      else:
        ref = ref.tobytes()
    if isinstance(ref, bytes):
      ref = ref.decode("utf8")
    assert isinstance(ref, str)
    seq_len_stats.collect([len(ref)])
    refs.append((seq_tag, ref))
    if interactive:
      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)
    elif log.verbose[5]:
      print(progress_prefix, "seq tag %r, ref len %i chars" % (seq_tag, len(ref)))
    seq_idx += 1
  print("Done. Num seqs %i. Total time %s." % (
    seq_idx, hms(time.time() - start_time)), file=log.v1)
  print("More seqs which we did not dumped: %s." % (
    dataset.is_less_than_num_seqs(seq_idx),), file=log.v1)
  seq_len_stats.dump(stream_prefix="Seq-length %r " % (options.key,), stream=log.v2)
  return refs
コード例 #3
0
def analyze_dataset(options):
  """
  :param options: argparse.Namespace
  """
  print("Epoch: %i" % options.epoch, file=log.v3)
  print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
  print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
  assert options.key in dataset.get_data_keys()

  terminal_width, _ = Util.terminal_size()
  show_interactive_process_bar = (log.verbose[3] and (not log.verbose[5]) and terminal_width >= 0)

  start_time = time.time()
  num_seqs_stats = Stats()
  if options.endseq < 0:
    options.endseq = float("inf")

  recurrent = True
  used_data_keys = dataset.get_data_keys()
  batch_size = config.typed_value('batch_size', 1)
  max_seqs = config.int('max_seqs', -1)
  seq_drop = config.float('seq_drop', 0.0)
  max_seq_length = config.typed_value('max_seq_length', None) or config.float('max_seq_length', 0)
  max_pad_size = config.typed_value("max_pad_size", None)

  batches = dataset.generate_batches(
    recurrent_net=recurrent,
    batch_size=batch_size,
    max_seqs=max_seqs,
    max_seq_length=max_seq_length,
    max_pad_size=max_pad_size,
    seq_drop=seq_drop,
    used_data_keys=used_data_keys)

  step = 0
  total_num_seqs = 0
  total_num_frames = NumbersDict()
  total_num_used_frames = NumbersDict()

  try:
    while batches.has_more():
      # See FeedDictDataProvider.
      batch, = batches.peek_next_n(1)
      assert isinstance(batch, Batch)
      if batch.start_seq > options.endseq:
        break
      dataset.load_seqs(batch.start_seq, batch.end_seq)
      complete_frac = batches.completed_frac()
      start_elapsed = time.time() - start_time
      try:
        num_seqs_s = str(dataset.num_seqs)
      except NotImplementedError:
        try:
          num_seqs_s = "~%i" % dataset.estimated_num_seqs
        except TypeError:  # a number is required, not NoneType
          num_seqs_s = "?"
      progress_prefix = "%i/%s" % (batch.start_seq, num_seqs_s)
      progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
      if complete_frac > 0:
        total_time_estimated = start_elapsed / complete_frac
        remaining_estimated = total_time_estimated - start_elapsed
        progress += " (%s)" % hms(remaining_estimated)

      batch_max_time = NumbersDict.max([seq.frame_length for seq in batch.seqs]) * len(batch.seqs)
      batch_num_used_frames = sum([seq.frame_length for seq in batch.seqs], NumbersDict())
      total_num_seqs += len(batch.seqs)
      num_seqs_stats.collect(numpy.array([len(batch.seqs)]))
      total_num_frames += batch_max_time
      total_num_used_frames += batch_num_used_frames

      print(
        "%s, batch %i, num seqs %i, frames %s, used %s (%s)" % (
          progress, step, len(batch.seqs),
          batch_max_time, batch_num_used_frames, batch_num_used_frames / batch_max_time),
        file=log.v5)
      if show_interactive_process_bar:
        Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

      step += 1
      batches.advance(1)

  finally:
    print("Done. Total time %s. More seqs which we did not dumped: %s" % (
      hms(time.time() - start_time), batches.has_more()), file=log.v2)
    print("Dataset epoch %i, order %r." % (dataset.epoch, dataset.seq_ordering))
    print("Num batches (steps): %i" % step, file=log.v1)
    print("Num seqs: %i" % total_num_seqs, file=log.v1)
    num_seqs_stats.dump(stream=log.v1, stream_prefix="Batch num seqs ")
    for key in used_data_keys:
      print("Data key %r:" % key, file=log.v1)
      print("  Num frames: %s" % total_num_frames[key], file=log.v1)
      print("  Num used frames: %s" % total_num_used_frames[key], file=log.v1)
      print("  Fraction used frames: %s" % (total_num_used_frames / total_num_frames)[key], file=log.v1)
    dataset.finish_epoch()
コード例 #4
0
def dump_dataset(dataset, options):
  """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
  print("Epoch: %i" % options.epoch, file=log.v3)
  dataset.init_seq_order(epoch=options.epoch)
  print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
  print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
  assert options.key in dataset.get_data_keys()

  if options.get_num_seqs:
    print("Get num seqs.")
    print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
    try:
      print("num_seqs: %r" % dataset.num_seqs)
    except Exception as exc:
      print("num_seqs exception %r, which is valid, so we count." % exc)
      seq_idx = 0
      if dataset.get_target_list():
        default_target = dataset.get_target_list()[0]
      else:
        default_target = None
      while dataset.is_less_than_num_seqs(seq_idx):
        dataset.load_seqs(seq_idx, seq_idx + 1)
        if seq_idx % 10000 == 0:
          if default_target:
            targets = dataset.get_targets(default_target, seq_idx)
            postfix = " (targets = %r...)" % (targets[:10],)
          else:
            postfix = ""
          print("%i ...%s" % (seq_idx, postfix))
        seq_idx += 1
      print("accumulated num seqs: %i" % seq_idx)
    print("Done.")
    return

  dump_file = None
  if options.type == "numpy":
    print("Dump files: %r*%r" % (options.dump_prefix, options.dump_postfix), file=log.v3)
  elif options.type == "stdout":
    print("Dump to stdout", file=log.v3)
    if options.stdout_limit is not None:
      Util.set_pretty_print_default_limit(options.stdout_limit)
      numpy.set_printoptions(
        threshold=sys.maxsize if options.stdout_limit == float("inf") else int(options.stdout_limit))
    if options.stdout_as_bytes:
      Util.set_pretty_print_as_bytes(options.stdout_as_bytes)
  elif options.type == "print_tag":
    print("Dump seq tag to stdout", file=log.v3)
  elif options.type == "dump_tag":
    dump_file = open("%sseq-tags.txt" % options.dump_prefix, "w")
    print("Dump seq tag to file: %s" % (dump_file.name,), file=log.v3)
  elif options.type == "dump_seq_len":
    dump_file = open("%sseq-lens.txt" % options.dump_prefix, "w")
    print("Dump seq lens to file: %s" % (dump_file.name,), file=log.v3)
    dump_file.write("{\n")
  elif options.type == "print_shape":
    print("Dump shape to stdout", file=log.v3)
  elif options.type == "plot":
    print("Plot.", file=log.v3)
  elif options.type == "interactive":
    print("Interactive debug shell.", file=log.v3)
  elif options.type == "null":
    print("No dump.")
  else:
    raise Exception("unknown dump option type %r" % options.type)

  start_time = time.time()
  stats = Stats() if (options.stats or options.dump_stats) else None
  seq_len_stats = {key: Stats() for key in dataset.get_data_keys()}
  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    dataset.load_seqs(seq_idx, seq_idx + 1)
    complete_frac = dataset.get_complete_frac(seq_idx)
    start_elapsed = time.time() - start_time
    try:
      num_seqs_s = str(dataset.num_seqs)
    except NotImplementedError:
      try:
        num_seqs_s = "~%i" % dataset.estimated_num_seqs
      except TypeError:  # a number is required, not NoneType
        num_seqs_s = "?"
    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
    if complete_frac > 0:
      total_time_estimated = start_elapsed / complete_frac
      remaining_estimated = total_time_estimated - start_elapsed
      progress += " (%s)" % hms(remaining_estimated)
    if options.type == "print_tag":
      print("seq %s tag:" % (progress if log.verbose[2] else progress_prefix), dataset.get_tag(seq_idx))
    elif options.type == "dump_tag":
      print("seq %s tag:" % (progress if log.verbose[2] else progress_prefix), dataset.get_tag(seq_idx))
      dump_file.write("%s\n" % dataset.get_tag(seq_idx))
    elif options.type == "dump_seq_len":
      seq_len = dataset.get_seq_length(seq_idx)[options.key]
      print(
        "seq %s tag:" % (progress if log.verbose[2] else progress_prefix),
        dataset.get_tag(seq_idx), "%r len:" % options.key, seq_len)
      dump_file.write("%r: %r,\n" % (dataset.get_tag(seq_idx), seq_len))
    else:
      data = dataset.get_data(seq_idx, options.key)
      if options.type == "numpy":
        numpy.savetxt("%s%i.data%s" % (options.dump_prefix, seq_idx, options.dump_postfix), data)
      elif options.type == "stdout":
        print("seq %s tag:" % progress, dataset.get_tag(seq_idx))
        print("seq %s data:" % progress, pretty_print(data))
      elif options.type == "print_shape":
        print("seq %s data shape:" % progress, data.shape)
      elif options.type == "plot":
        plot(data)
      for target in dataset.get_target_list():
        targets = dataset.get_targets(target, seq_idx)
        if options.type == "numpy":
          numpy.savetxt("%s%i.targets.%s%s" % (options.dump_prefix, seq_idx, target, options.dump_postfix), targets, fmt='%i')
        elif options.type == "stdout":
          extra = ""
          if target in dataset.labels and len(dataset.labels[target]) > 1:
            assert dataset.can_serialize_data(target)
            extra += " (%r)" % dataset.serialize_data(key=target, data=targets)
          print("seq %i target %r: %s%s" % (seq_idx, target, pretty_print(targets), extra))
        elif options.type == "print_shape":
          print("seq %i target %r shape:" % (seq_idx, target), targets.shape)
      if options.type == "interactive":
        from Debug import debug_shell
        debug_shell(locals())
    seq_len = dataset.get_seq_length(seq_idx)
    for key in dataset.get_data_keys():
      seq_len_stats[key].collect([seq_len[key]])
    if stats:
      stats.collect(data)
    if options.type == "null":
      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

    seq_idx += 1

  print("Done. Total time %s. More seqs which we did not dumped: %s" % (
    hms_fraction(time.time() - start_time), dataset.is_less_than_num_seqs(seq_idx)), file=log.v2)
  for key in dataset.get_data_keys():
    seq_len_stats[key].dump(stream_prefix="Seq-length %r " % key, stream=log.v2)
  if stats:
    stats.dump(output_file_prefix=options.dump_stats, stream_prefix="Data %r " % options.key, stream=log.v1)
  if options.type == "dump_seq_len":
    dump_file.write("}\n")
  if dump_file:
    print("Dumped to file:", dump_file.name, file=log.v2)
    dump_file.close()
コード例 #5
0
def dump_dataset(dataset, options):
    """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
    print("Epoch: %i" % options.epoch, file=log.v3)
    dataset.init_seq_order(epoch=options.epoch)
    print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
    print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
    assert options.key in dataset.get_data_keys()

    if options.get_num_seqs:
        print("Get num seqs.")
        print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
        try:
            print("num_seqs: %r" % dataset.num_seqs)
        except Exception as exc:
            print("num_seqs exception %r, which is valid, so we count." % exc)
            seq_idx = 0
            if dataset.get_target_list():
                default_target = dataset.get_target_list()[0]
            else:
                default_target = None
            while dataset.is_less_than_num_seqs(seq_idx):
                dataset.load_seqs(seq_idx, seq_idx + 1)
                if seq_idx % 10000 == 0:
                    if default_target:
                        targets = dataset.get_targets(default_target, seq_idx)
                        postfix = " (targets = %r...)" % (targets[:10], )
                    else:
                        postfix = ""
                    print("%i ...%s" % (seq_idx, postfix))
                seq_idx += 1
            print("accumulated num seqs: %i" % seq_idx)
        print("Done.")
        return

    if options.type == "numpy":
        print("Dump files: %r*%r" %
              (options.dump_prefix, options.dump_postfix),
              file=log.v3)
    elif options.type == "stdout":
        print("Dump to stdout", file=log.v3)
    elif options.type == "print_shape":
        print("Dump shape to stdout", file=log.v3)
    elif options.type == "plot":
        print("Plot.", file=log.v3)
    elif options.type == "null":
        print("No dump.")
    else:
        raise Exception("unknown dump option type %r" % options.type)

    start_time = time.time()
    stats = Stats() if (options.stats or options.dump_stats) else None
    seq_len_stats = {key: Stats() for key in dataset.get_data_keys()}
    seq_idx = options.startseq
    if options.endseq < 0:
        options.endseq = float("inf")
    while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
        dataset.load_seqs(seq_idx, seq_idx + 1)
        complete_frac = dataset.get_complete_frac(seq_idx)
        start_elapsed = time.time() - start_time
        try:
            num_seqs_s = str(dataset.num_seqs)
        except NotImplementedError:
            try:
                num_seqs_s = "~%i" % dataset.estimated_num_seqs
            except TypeError:  # a number is required, not NoneType
                num_seqs_s = "?"
        progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
        progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
        if complete_frac > 0:
            total_time_estimated = start_elapsed / complete_frac
            remaining_estimated = total_time_estimated - start_elapsed
            progress += " (%s)" % hms(remaining_estimated)
        data = dataset.get_data(seq_idx, options.key)
        if options.type == "numpy":
            numpy.savetxt(
                "%s%i.data%s" %
                (options.dump_prefix, seq_idx, options.dump_postfix), data)
        elif options.type == "stdout":
            print("seq %s data:" % progress, pretty_print(data))
        elif options.type == "print_shape":
            print("seq %s data shape:" % progress, data.shape)
        elif options.type == "plot":
            plot(data)
        for target in dataset.get_target_list():
            targets = dataset.get_targets(target, seq_idx)
            if options.type == "numpy":
                numpy.savetxt("%s%i.targets.%s%s" %
                              (options.dump_prefix, seq_idx, target,
                               options.dump_postfix),
                              targets,
                              fmt='%i')
            elif options.type == "stdout":
                print("seq %i target %r:" % (seq_idx, target),
                      pretty_print(targets))
            elif options.type == "print_shape":
                print("seq %i target %r shape:" % (seq_idx, target),
                      targets.shape)
        seq_len = dataset.get_seq_length(seq_idx)
        for key in dataset.get_data_keys():
            seq_len_stats[key].collect([seq_len[key]])
        if stats:
            stats.collect(data)
        if options.type == "null":
            Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

        seq_idx += 1

    print("Done. Total time %s. More seqs which we did not dumped: %s" %
          (hms(time.time() - start_time),
           dataset.is_less_than_num_seqs(seq_idx)),
          file=log.v1)
    for key in dataset.get_data_keys():
        seq_len_stats[key].dump(stream_prefix="Seq-length %r " % key,
                                stream=log.v2)
    if stats:
        stats.dump(output_file_prefix=options.dump_stats,
                   stream_prefix="Data %r " % options.key,
                   stream=log.v2)
コード例 #6
0
ファイル: dump-dataset.py プロジェクト: rwth-i6/returnn
def dump_dataset(dataset, options):
  """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
  print("Epoch: %i" % options.epoch, file=log.v3)
  dataset.init_seq_order(epoch=options.epoch)
  print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
  print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
  assert options.key in dataset.get_data_keys()

  if options.get_num_seqs:
    print("Get num seqs.")
    print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
    try:
      print("num_seqs: %r" % dataset.num_seqs)
    except Exception as exc:
      print("num_seqs exception %r, which is valid, so we count." % exc)
      seq_idx = 0
      if dataset.get_target_list():
        default_target = dataset.get_target_list()[0]
      else:
        default_target = None
      while dataset.is_less_than_num_seqs(seq_idx):
        dataset.load_seqs(seq_idx, seq_idx + 1)
        if seq_idx % 10000 == 0:
          if default_target:
            targets = dataset.get_targets(default_target, seq_idx)
            postfix = " (targets = %r...)" % (targets[:10],)
          else:
            postfix = ""
          print("%i ...%s" % (seq_idx, postfix))
        seq_idx += 1
      print("accumulated num seqs: %i" % seq_idx)
    print("Done.")
    return

  if options.type == "numpy":
    print("Dump files: %r*%r" % (options.dump_prefix, options.dump_postfix), file=log.v3)
  elif options.type == "stdout":
    print("Dump to stdout", file=log.v3)
    if options.stdout_limit is not None:
      Util.set_pretty_print_default_limit(options.stdout_limit)
      numpy.set_printoptions(
        threshold=sys.maxsize if options.stdout_limit == float("inf") else int(options.stdout_limit))
    if options.stdout_as_bytes:
      Util.set_pretty_print_as_bytes(options.stdout_as_bytes)
  elif options.type == "print_tag":
    print("Dump seq tag to stdout", file=log.v3)
  elif options.type == "print_shape":
    print("Dump shape to stdout", file=log.v3)
  elif options.type == "plot":
    print("Plot.", file=log.v3)
  elif options.type == "null":
    print("No dump.")
  else:
    raise Exception("unknown dump option type %r" % options.type)

  start_time = time.time()
  stats = Stats() if (options.stats or options.dump_stats) else None
  seq_len_stats = {key: Stats() for key in dataset.get_data_keys()}
  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    dataset.load_seqs(seq_idx, seq_idx + 1)
    complete_frac = dataset.get_complete_frac(seq_idx)
    start_elapsed = time.time() - start_time
    try:
      num_seqs_s = str(dataset.num_seqs)
    except NotImplementedError:
      try:
        num_seqs_s = "~%i" % dataset.estimated_num_seqs
      except TypeError:  # a number is required, not NoneType
        num_seqs_s = "?"
    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
    if complete_frac > 0:
      total_time_estimated = start_elapsed / complete_frac
      remaining_estimated = total_time_estimated - start_elapsed
      progress += " (%s)" % hms(remaining_estimated)
    if options.type == "print_tag":
      print("seq %s tag:" % (progress if log.verbose[2] else progress_prefix), dataset.get_tag(seq_idx))
    else:
      data = dataset.get_data(seq_idx, options.key)
      if options.type == "numpy":
        numpy.savetxt("%s%i.data%s" % (options.dump_prefix, seq_idx, options.dump_postfix), data)
      elif options.type == "stdout":
        print("seq %s tag:" % progress, dataset.get_tag(seq_idx))
        print("seq %s data:" % progress, pretty_print(data))
      elif options.type == "print_shape":
        print("seq %s data shape:" % progress, data.shape)
      elif options.type == "plot":
        plot(data)
      for target in dataset.get_target_list():
        targets = dataset.get_targets(target, seq_idx)
        if options.type == "numpy":
          numpy.savetxt("%s%i.targets.%s%s" % (options.dump_prefix, seq_idx, target, options.dump_postfix), targets, fmt='%i')
        elif options.type == "stdout":
          extra = ""
          if target in dataset.labels and len(dataset.labels[target]) > 1:
            labels = dataset.labels[target]
            if len(labels) < 1000 and all([len(l) == 1 for l in labels]):
              join_str = ""
            else:
              join_str = " "
            extra += " (%r)" % join_str.join(map(dataset.labels[target].__getitem__, targets))
          print("seq %i target %r: %s%s" % (seq_idx, target, pretty_print(targets), extra))
        elif options.type == "print_shape":
          print("seq %i target %r shape:" % (seq_idx, target), targets.shape)
    seq_len = dataset.get_seq_length(seq_idx)
    for key in dataset.get_data_keys():
      seq_len_stats[key].collect([seq_len[key]])
    if stats:
      stats.collect(data)
    if options.type == "null":
      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

    seq_idx += 1

  print("Done. Total time %s. More seqs which we did not dumped: %s" % (
    hms(time.time() - start_time), dataset.is_less_than_num_seqs(seq_idx)), file=log.v2)
  for key in dataset.get_data_keys():
    seq_len_stats[key].dump(stream_prefix="Seq-length %r " % key, stream=log.v2)
  if stats:
    stats.dump(output_file_prefix=options.dump_stats, stream_prefix="Data %r " % options.key, stream=log.v1)