Ejemplo n.º 1
0
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_returnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
Ejemplo n.º 2
0
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_crnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
Ejemplo n.º 3
0
 def run(self):
   if self.individual.cost is not None:
     return self.individual.cost
   start_time = time.time()
   hyper_param_mapping = self.individual.hyper_param_mapping
   print("Training %r using hyper params:" % self.individual.name, file=log.v2)
   for p in self.optim.hyper_params:
     print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2)
   config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids)
   engine = Engine(config=config)
   train_data = StaticDataset.copy_from_dataset(self.optim.train_data)
   engine.init_train_from_config(config=config, train_data=train_data)
   # Not directly calling train() as we want to have full control.
   engine.epoch = 1
   train_data.init_seq_order(epoch=engine.epoch)
   batches = train_data.generate_batches(
     recurrent_net=engine.network.recurrent,
     batch_size=engine.batch_size,
     max_seqs=engine.max_seqs,
     max_seq_length=int(engine.max_seq_length),
     seq_drop=engine.seq_drop,
     shuffle_batches=engine.shuffle_batches,
     used_data_keys=engine.network.used_data_keys)
   engine.updater.set_learning_rate(engine.learning_rate)
   trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True)
   self.runner = trainer
   if self.cancel_flag:
     raise CancelTrainingException("Trainer cancel flag is set")
   trainer.run(report_prefix="hyper param tune train %r" % self.individual.name)
   if not trainer.finalized:
     print("Trainer exception:", trainer.run_exception, file=log.v1)
     raise trainer.run_exception
   cost = trainer.score["cost:output"]
   print(
     "Individual %s:" % self.individual.name,
     "Train cost:", cost,
     "elapsed time:", hms_fraction(time.time() - start_time),
     file=self.optim.log)
   self.individual.cost = cost
Ejemplo n.º 4
0
 def run(self):
   if self.individual.cost is not None:
     return self.individual.cost
   start_time = time.time()
   hyper_param_mapping = self.individual.hyper_param_mapping
   print("Training %r using hyper params:" % self.individual.name, file=log.v2)
   for p in self.optim.hyper_params:
     print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2)
   config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids)
   engine = Engine(config=config)
   train_data = StaticDataset.copy_from_dataset(self.optim.train_data)
   engine.init_train_from_config(config=config, train_data=train_data)
   # Not directly calling train() as we want to have full control.
   engine.epoch = 1
   train_data.init_seq_order(epoch=engine.epoch)
   batches = train_data.generate_batches(
     recurrent_net=engine.network.recurrent,
     batch_size=engine.batch_size,
     max_seqs=engine.max_seqs,
     max_seq_length=int(engine.max_seq_length),
     seq_drop=engine.seq_drop,
     shuffle_batches=engine.shuffle_batches,
     used_data_keys=engine.network.used_data_keys)
   engine.updater.set_learning_rate(engine.learning_rate, session=engine.tf_session)
   trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True)
   self.runner = trainer
   if self.cancel_flag:
     raise CancelTrainingException("Trainer cancel flag is set")
   trainer.run(report_prefix="hyper param tune train %r" % self.individual.name)
   if not trainer.finalized:
     print("Trainer exception:", trainer.run_exception, file=log.v1)
     raise trainer.run_exception
   cost = trainer.score["cost:output"]
   print(
     "Individual %s:" % self.individual.name,
     "Train cost:", cost,
     "elapsed time:", hms_fraction(time.time() - start_time),
     file=self.optim.log)
   self.individual.cost = cost
Ejemplo n.º 5
0
def benchmark(lstm_unit, use_gpu):
  """
  :param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes
  :param bool use_gpu:
  :return: runtime in seconds of the training itself, excluding initialization
  :rtype: float
  """
  device = {True: "GPU", False: "CPU"}[use_gpu]
  key = "%s:%s" % (device, lstm_unit)
  print(">>> Start benchmark for %s." % key)
  config = Config()
  config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu))
  dataset_kwargs = config.typed_value("train")
  Dataset.kwargs_update_from_config(config, dataset_kwargs)
  dataset = init_dataset(dataset_kwargs)
  engine = Engine(config=config)
  engine.init_train_from_config(config=config, train_data=dataset)
  print(">>> Start training now for %s." % key)
  start_time = time.time()
  engine.train()
  runtime = time.time() - start_time
  print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime)))
  engine.finalize()
  return runtime
Ejemplo n.º 6
0
def benchmark(lstm_unit, use_gpu):
  """
  :param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes
  :param bool use_gpu:
  :return: runtime in seconds of the training itself, excluding initialization
  :rtype: float
  """
  device = {True: "GPU", False: "CPU"}[use_gpu]
  key = "%s:%s" % (device, lstm_unit)
  print(">>> Start benchmark for %s." % key)
  config = Config()
  config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu))
  dataset_kwargs = config.typed_value("train")
  Dataset.kwargs_update_from_config(config, dataset_kwargs)
  dataset = init_dataset(dataset_kwargs)
  engine = Engine(config=config)
  engine.init_train_from_config(config=config, train_data=dataset)
  print(">>> Start training now for %s." % key)
  start_time = time.time()
  engine.train()
  runtime = time.time() - start_time
  print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime)))
  engine.finalize()
  return runtime
Ejemplo n.º 7
0
def execute_main_task():
  """
  Executes the main task (via config ``task`` option).
  """
  from Util import hms_fraction
  start_time = time.time()
  task = config.value('task', 'train')
  if task == 'train':
    assert train_data.have_seqs(), "no train files specified, check train option: %s" % config.value('train', None)
    engine.init_train_from_config(config, train_data, dev_data, eval_data)
    engine.train()
  elif task == "eval":
    epoch = config.int("epoch", -1)
    load_epoch = config.int("load_epoch", -1)
    if epoch >= 0:
      assert (load_epoch < 0) or (load_epoch == epoch), "epoch and load_epoch have to match"
      engine.epoch = epoch
      config.set('load_epoch', engine.epoch)
    else:
      assert load_epoch, "specify epoch or load_epoch"
      engine.epoch = load_epoch
    engine.init_train_from_config(config, train_data, dev_data, eval_data)
    print("Evaluate epoch", engine.epoch, file=log.v4)
    engine.eval_model(
      output_file=config.value("eval_output_file", None),
      output_per_seq_file=config.value("eval_output_file_per_seq", None),
      loss_name=config.value("loss_name", None),
      output_per_seq_format=config.list("output_per_seq_format", ["score"]),
      output_per_seq_file_format=config.value("output_per_seq_file_format", "txt"))
  elif task in ['forward', 'hpx']:
    assert eval_data is not None, 'no eval data provided'
    combine_labels = config.value('combine_labels', '')
    engine.use_search_flag = config.bool("forward_use_search", False)
    if config.has("epoch"):
      config.set('load_epoch', config.int('epoch', 0))
    engine.init_network_from_config(config)
    output_file = config.value('output_file', 'dump-fwd-epoch-%i.hdf' % engine.epoch)
    engine.forward_to_hdf(
      data=eval_data, output_file=output_file, combine_labels=combine_labels,
      batch_size=config.int('forward_batch_size', 0))
  elif task == "search":
    engine.use_search_flag = True
    engine.init_network_from_config(config)
    if config.value("search_data", "eval") in ["train", "dev", "eval"]:
      data = {"train": train_data, "dev": dev_data, "eval": eval_data}[config.value("search_data", "eval")]
      assert data, "set search_data"
    else:
      data = init_dataset(config.opt_typed_value("search_data"))
    engine.search(
      data,
      do_eval=config.bool("search_do_eval", True),
      output_layer_names=config.typed_value("search_output_layer", "output"),
      output_file=config.value("search_output_file", ""),
      output_file_format=config.value("search_output_file_format", "txt"))
  elif task == 'compute_priors':
    assert train_data is not None, 'train data for priors should be provided'
    engine.init_network_from_config(config)
    engine.compute_priors(dataset=train_data, config=config)
  elif task == 'theano_graph':
    import theano.printing
    import theano.compile.io
    import theano.compile.function_module
    engine.start_epoch = 1
    engine.init_network_from_config(config)
    for task in config.list('theano_graph.task', ['train']):
      func = engine.devices[-1].get_compute_func(task)
      prefix = config.value("theano_graph.prefix", "current") + ".task"
      print("dumping to %s.* ..." % prefix, file=log.v1)
      theano.printing.debugprint(func, file=open("%s.optimized_func.txt" % prefix, "w"))
      assert isinstance(func.maker, theano.compile.function_module.FunctionMaker)
      for inp in func.maker.inputs:
        assert isinstance(inp, theano.compile.io.In)
        if inp.update:
          theano.printing.debugprint(
            inp.update, file=open("%s.unoptimized.var_%s_update.txt" % (prefix, inp.name), "w"))
      theano.printing.pydotprint(func, format='png', var_with_name_simple=True,
                                 outfile="%s.png" % prefix)
  elif task == 'analyze':  # anything based on the network + Device
    statistics = config.list('statistics', None)
    engine.init_network_from_config(config)
    engine.analyze(data=eval_data or dev_data, statistics=statistics)
  elif task == "analyze_data":  # anything just based on the data
    analyze_data(config)
  elif task == "classify":
    assert eval_data is not None, 'no eval data provided'
    assert config.has('label_file'), 'no output file provided'
    label_file = config.value('label_file', '')
    engine.init_network_from_config(config)
    engine.classify(engine.devices[0], eval_data, label_file)
  elif task == "hyper_param_tuning":
    import HyperParamTuning
    tuner = HyperParamTuning.Optimization(config=config, train_data=train_data)
    tuner.work()
  elif task == "cleanup_old_models":
    engine.cleanup_old_models(ask_for_confirmation=True)
  elif task == "daemon":
    engine.init_network_from_config(config)
    engine.daemon(config)
  elif task == "server":
    print("Server Initiating", file=log.v1)
    server.run()
  elif task == "search_server":
    engine.use_search_flag = True
    engine.init_network_from_config(config)
    engine.web_server(port=config.int("web_server_port", 12380))
  elif task.startswith("config:"):
    action = config.typed_dict[task[len("config:"):]]
    print("Task: %r" % action, file=log.v1)
    assert callable(action)
    action()
  elif task.startswith("optional-config:"):
    action = config.typed_dict.get(task[len("optional-config:"):], None)
    if action is None:
      print("No task found for %r, so just quitting." % task, file=log.v1)
    else:
      print("Task: %r" % action, file=log.v1)
      assert callable(action)
      action()
  elif task == "nop":
    print("Task: No-operation", file=log.v1)
  elif task == "nop_init_net_train":
    print("Task: No-operation, despite initializing the network (for training)", file=log.v1)
    engine.init_train_from_config(config, train_data, dev_data, eval_data)
  elif task == "initialize_model":
    engine.init_train_from_config(config, train_data, dev_data, eval_data)
    engine.save_model(config.value('model', 'dummy'))
  else:
    assert False, "unknown task: %s" % task

  print(("elapsed: %s" % hms_fraction(time.time() - start_time)), file=log.v3)
Ejemplo n.º 8
0
def execute_main_task():
    """
  Executes the main task (via config ``task`` option).
  """
    from Util import hms_fraction
    start_time = time.time()
    task = config.value('task', 'train')
    if config.is_true("dry_run"):
        print("Dry run, will not save anything.", file=log.v1)
    if task == 'train':
        assert train_data.have_seqs(
        ), "no train files specified, check train option: %s" % config.value(
            'train', None)
        engine.init_train_from_config(config, train_data, dev_data, eval_data)
        engine.train()
    elif task == "eval":
        epoch = config.int("epoch", -1)
        load_epoch = config.int("load_epoch", -1)
        if epoch >= 0:
            assert (load_epoch < 0) or (
                load_epoch == epoch), "epoch and load_epoch have to match"
            engine.epoch = epoch
            config.set('load_epoch', engine.epoch)
        else:
            assert load_epoch >= 0, "specify epoch or load_epoch"
            engine.epoch = load_epoch
        engine.init_train_from_config(config, train_data, dev_data, eval_data)
        print("Evaluate epoch", engine.epoch, file=log.v4)
        engine.eval_model(
            output_file=config.value("eval_output_file", None),
            output_per_seq_file=config.value("eval_output_file_per_seq", None),
            loss_name=config.value("loss_name", None),
            output_per_seq_format=config.list("output_per_seq_format",
                                              ["score"]),
            output_per_seq_file_format=config.value(
                "output_per_seq_file_format", "txt"))
    elif task in ['forward', 'hpx']:
        assert eval_data is not None, 'no eval data provided'
        combine_labels = config.value('combine_labels', '')
        engine.use_search_flag = config.bool("forward_use_search", False)
        if config.has("epoch"):
            config.set('load_epoch', config.int('epoch', 0))
        engine.init_network_from_config(config)
        output_file = config.value('output_file',
                                   'dump-fwd-epoch-%i.hdf' % engine.epoch)
        engine.forward_to_hdf(data=eval_data,
                              output_file=output_file,
                              combine_labels=combine_labels,
                              batch_size=config.int('forward_batch_size', 0))
    elif task == "search":
        engine.use_search_flag = True
        engine.init_network_from_config(config)
        if config.value("search_data", "eval") in ["train", "dev", "eval"]:
            data = {
                "train": train_data,
                "dev": dev_data,
                "eval": eval_data
            }[config.value("search_data", "eval")]
            assert data, "set search_data"
        else:
            data = init_dataset(config.opt_typed_value("search_data"))
        engine.search(
            data,
            do_eval=config.bool("search_do_eval", True),
            output_layer_names=config.typed_value("search_output_layer",
                                                  "output"),
            output_file=config.value("search_output_file", ""),
            output_file_format=config.value("search_output_file_format",
                                            "txt"))
    elif task == 'compute_priors':
        assert train_data is not None, 'train data for priors should be provided'
        engine.init_network_from_config(config)
        engine.compute_priors(dataset=train_data, config=config)
    elif task == 'theano_graph':
        # noinspection PyPackageRequirements,PyUnresolvedReferences
        import theano.printing
        # noinspection PyPackageRequirements,PyUnresolvedReferences
        import theano.compile.io
        # noinspection PyPackageRequirements,PyUnresolvedReferences
        import theano.compile.function_module
        engine.start_epoch = 1
        engine.init_network_from_config(config)
        for task in config.list('theano_graph.task', ['train']):
            func = engine.devices[-1].get_compute_func(task)
            prefix = config.value("theano_graph.prefix", "current") + ".task"
            print("dumping to %s.* ..." % prefix, file=log.v1)
            theano.printing.debugprint(func,
                                       file=open(
                                           "%s.optimized_func.txt" % prefix,
                                           "w"))
            assert isinstance(func.maker,
                              theano.compile.function_module.FunctionMaker)
            for inp in func.maker.inputs:
                assert isinstance(inp, theano.compile.io.In)
                if inp.update:
                    theano.printing.debugprint(
                        inp.update,
                        file=open(
                            "%s.unoptimized.var_%s_update.txt" %
                            (prefix, inp.name), "w"))
            theano.printing.pydotprint(func,
                                       format='png',
                                       var_with_name_simple=True,
                                       outfile="%s.png" % prefix)
    elif task == 'analyze':  # anything based on the network + Device
        statistics = config.list('statistics', None)
        engine.init_network_from_config(config)
        engine.analyze(data=eval_data or dev_data, statistics=statistics)
    elif task == "analyze_data":  # anything just based on the data
        analyze_data(config)
    elif task == "classify":
        assert eval_data is not None, 'no eval data provided'
        assert config.has('label_file'), 'no output file provided'
        label_file = config.value('label_file', '')
        engine.init_network_from_config(config)
        engine.classify(engine.devices[0], eval_data, label_file)
    elif task == "hyper_param_tuning":
        import HyperParamTuning
        tuner = HyperParamTuning.Optimization(config=config,
                                              train_data=train_data)
        tuner.work()
    elif task == "cleanup_old_models":
        engine.cleanup_old_models(ask_for_confirmation=True)
    elif task == "daemon":
        engine.init_network_from_config(config)
        engine.daemon(config)
    elif task == "server":
        print("Server Initiating", file=log.v1)
        server.run()
    elif task == "search_server":
        engine.use_search_flag = True
        engine.init_network_from_config(config)
        engine.web_server(port=config.int("web_server_port", 12380))
    elif task.startswith("config:"):
        action = config.typed_dict[task[len("config:"):]]
        print("Task: %r" % action, file=log.v1)
        assert callable(action)
        action()
    elif task.startswith("optional-config:"):
        action = config.typed_dict.get(task[len("optional-config:"):], None)
        if action is None:
            print("No task found for %r, so just quitting." % task,
                  file=log.v1)
        else:
            print("Task: %r" % action, file=log.v1)
            assert callable(action)
            action()
    elif task == "nop":
        print("Task: No-operation", file=log.v1)
    elif task == "nop_init_net_train":
        print(
            "Task: No-operation, despite initializing the network (for training)",
            file=log.v1)
        engine.init_train_from_config(config, train_data, dev_data, eval_data)
    elif task == "initialize_model":
        engine.init_train_from_config(config, train_data, dev_data, eval_data)
        engine.save_model(config.value('model', 'dummy'))
    else:
        assert False, "unknown task: %s" % task

    print(("elapsed: %s" % hms_fraction(time.time() - start_time)),
          file=log.v3)
Ejemplo n.º 9
0
def dump_dataset(dataset, options):
  """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
  print("Epoch: %i" % options.epoch, file=log.v3)
  dataset.init_seq_order(epoch=options.epoch)
  print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
  print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
  assert options.key in dataset.get_data_keys()

  if options.get_num_seqs:
    print("Get num seqs.")
    print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
    try:
      print("num_seqs: %r" % dataset.num_seqs)
    except Exception as exc:
      print("num_seqs exception %r, which is valid, so we count." % exc)
      seq_idx = 0
      if dataset.get_target_list():
        default_target = dataset.get_target_list()[0]
      else:
        default_target = None
      while dataset.is_less_than_num_seqs(seq_idx):
        dataset.load_seqs(seq_idx, seq_idx + 1)
        if seq_idx % 10000 == 0:
          if default_target:
            targets = dataset.get_targets(default_target, seq_idx)
            postfix = " (targets = %r...)" % (targets[:10],)
          else:
            postfix = ""
          print("%i ...%s" % (seq_idx, postfix))
        seq_idx += 1
      print("accumulated num seqs: %i" % seq_idx)
    print("Done.")
    return

  dump_file = None
  if options.type == "numpy":
    print("Dump files: %r*%r" % (options.dump_prefix, options.dump_postfix), file=log.v3)
  elif options.type == "stdout":
    print("Dump to stdout", file=log.v3)
    if options.stdout_limit is not None:
      Util.set_pretty_print_default_limit(options.stdout_limit)
      numpy.set_printoptions(
        threshold=sys.maxsize if options.stdout_limit == float("inf") else int(options.stdout_limit))
    if options.stdout_as_bytes:
      Util.set_pretty_print_as_bytes(options.stdout_as_bytes)
  elif options.type == "print_tag":
    print("Dump seq tag to stdout", file=log.v3)
  elif options.type == "dump_tag":
    dump_file = open("%sseq-tags.txt" % options.dump_prefix, "w")
    print("Dump seq tag to file: %s" % (dump_file.name,), file=log.v3)
  elif options.type == "dump_seq_len":
    dump_file = open("%sseq-lens.txt" % options.dump_prefix, "w")
    print("Dump seq lens to file: %s" % (dump_file.name,), file=log.v3)
    dump_file.write("{\n")
  elif options.type == "print_shape":
    print("Dump shape to stdout", file=log.v3)
  elif options.type == "plot":
    print("Plot.", file=log.v3)
  elif options.type == "interactive":
    print("Interactive debug shell.", file=log.v3)
  elif options.type == "null":
    print("No dump.")
  else:
    raise Exception("unknown dump option type %r" % options.type)

  start_time = time.time()
  stats = Stats() if (options.stats or options.dump_stats) else None
  seq_len_stats = {key: Stats() for key in dataset.get_data_keys()}
  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    dataset.load_seqs(seq_idx, seq_idx + 1)
    complete_frac = dataset.get_complete_frac(seq_idx)
    start_elapsed = time.time() - start_time
    try:
      num_seqs_s = str(dataset.num_seqs)
    except NotImplementedError:
      try:
        num_seqs_s = "~%i" % dataset.estimated_num_seqs
      except TypeError:  # a number is required, not NoneType
        num_seqs_s = "?"
    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
    if complete_frac > 0:
      total_time_estimated = start_elapsed / complete_frac
      remaining_estimated = total_time_estimated - start_elapsed
      progress += " (%s)" % hms(remaining_estimated)
    if options.type == "print_tag":
      print("seq %s tag:" % (progress if log.verbose[2] else progress_prefix), dataset.get_tag(seq_idx))
    elif options.type == "dump_tag":
      print("seq %s tag:" % (progress if log.verbose[2] else progress_prefix), dataset.get_tag(seq_idx))
      dump_file.write("%s\n" % dataset.get_tag(seq_idx))
    elif options.type == "dump_seq_len":
      seq_len = dataset.get_seq_length(seq_idx)[options.key]
      print(
        "seq %s tag:" % (progress if log.verbose[2] else progress_prefix),
        dataset.get_tag(seq_idx), "%r len:" % options.key, seq_len)
      dump_file.write("%r: %r,\n" % (dataset.get_tag(seq_idx), seq_len))
    else:
      data = dataset.get_data(seq_idx, options.key)
      if options.type == "numpy":
        numpy.savetxt("%s%i.data%s" % (options.dump_prefix, seq_idx, options.dump_postfix), data)
      elif options.type == "stdout":
        print("seq %s tag:" % progress, dataset.get_tag(seq_idx))
        print("seq %s data:" % progress, pretty_print(data))
      elif options.type == "print_shape":
        print("seq %s data shape:" % progress, data.shape)
      elif options.type == "plot":
        plot(data)
      for target in dataset.get_target_list():
        targets = dataset.get_targets(target, seq_idx)
        if options.type == "numpy":
          numpy.savetxt("%s%i.targets.%s%s" % (options.dump_prefix, seq_idx, target, options.dump_postfix), targets, fmt='%i')
        elif options.type == "stdout":
          extra = ""
          if target in dataset.labels and len(dataset.labels[target]) > 1:
            assert dataset.can_serialize_data(target)
            extra += " (%r)" % dataset.serialize_data(key=target, data=targets)
          print("seq %i target %r: %s%s" % (seq_idx, target, pretty_print(targets), extra))
        elif options.type == "print_shape":
          print("seq %i target %r shape:" % (seq_idx, target), targets.shape)
      if options.type == "interactive":
        from Debug import debug_shell
        debug_shell(locals())
    seq_len = dataset.get_seq_length(seq_idx)
    for key in dataset.get_data_keys():
      seq_len_stats[key].collect([seq_len[key]])
    if stats:
      stats.collect(data)
    if options.type == "null":
      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

    seq_idx += 1

  print("Done. Total time %s. More seqs which we did not dumped: %s" % (
    hms_fraction(time.time() - start_time), dataset.is_less_than_num_seqs(seq_idx)), file=log.v2)
  for key in dataset.get_data_keys():
    seq_len_stats[key].dump(stream_prefix="Seq-length %r " % key, stream=log.v2)
  if stats:
    stats.dump(output_file_prefix=options.dump_stats, stream_prefix="Data %r " % options.key, stream=log.v1)
  if options.type == "dump_seq_len":
    dump_file.write("}\n")
  if dump_file:
    print("Dumped to file:", dump_file.name, file=log.v2)
    dump_file.close()