def main(argv):
  arg_parser = argparse.ArgumentParser(description='Dump search scores and other info to HDF file.')
  arg_parser.add_argument('config', help="filename to config-file")
  arg_parser.add_argument("--dataset", default="config:train")
  arg_parser.add_argument("--epoch", type=int, default=-1, help="-1 for last epoch")
  arg_parser.add_argument("--output_file", help='hdf', required=True)
  arg_parser.add_argument("--rec_layer_name", default="output")
  arg_parser.add_argument("--cheating", action="store_true", help="add ground truth to the beam")
  arg_parser.add_argument("--att_weights", action="store_true", help="dump all softmax_over_spatial layers")
  arg_parser.add_argument("--verbosity", default=4, type=int, help="5 for all seqs (default: 4)")
  arg_parser.add_argument("--seq_list", nargs="+", help="use only these seqs")
  args, remaining_args = arg_parser.parse_known_args(argv[1:])
  init(config_filename=args.config, log_verbosity=args.verbosity, remaining_args=remaining_args)

  dataset = init_dataset(args.dataset)
  print("Dataset:")
  pprint(dataset)
  if args.seq_list:
    dataset.seq_tags_filter = set(args.seq_list)
    dataset.partition_epoch = 1  # reset
    if isinstance(dataset, MetaDataset):
      for sub_dataset in dataset.datasets.values():
        dataset.seq_tags_filter = set(args.seq_list)
        sub_dataset.partition_epoch = 1
    dataset.finish_epoch()  # enforce reset
  if dataset.seq_tags_filter is not None:
    print("Using sequences:")
    pprint(dataset.seq_tags_filter)
  if args.epoch >= 1:
    config.set("load_epoch", args.epoch)

  def net_dict_post_proc(net_dict):
    """
    :param dict[str] net_dict:
    :return: net_dict
    :rtype: dict[str]
    """
    prepare_compile(
      rec_layer_name=args.rec_layer_name, net_dict=net_dict,
      cheating=args.cheating, dump_att_weights=args.att_weights,
      hdf_filename=args.output_file, possible_labels=dataset.labels)
    return net_dict

  engine = Engine(config=config)
  engine.use_search_flag = True
  engine.init_network_from_config(config, net_dict_post_proc=net_dict_post_proc)
  engine.search(
    dataset,
    do_eval=config.bool("search_do_eval", True),
    output_layer_names=args.rec_layer_name)
  engine.finalize()
  print("Search finished.")
  assert os.path.exists(args.output_file), "hdf file not dumped?"
예제 #2
0
 def run(self):
   if self.individual.cost is not None:
     return self.individual.cost
   start_time = time.time()
   hyper_param_mapping = self.individual.hyper_param_mapping
   print("Training %r using hyper params:" % self.individual.name, file=log.v2)
   for p in self.optim.hyper_params:
     print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2)
   config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids)
   engine = Engine(config=config)
   train_data = StaticDataset.copy_from_dataset(self.optim.train_data)
   engine.init_train_from_config(config=config, train_data=train_data)
   # Not directly calling train() as we want to have full control.
   engine.epoch = 1
   train_data.init_seq_order(epoch=engine.epoch)
   batches = train_data.generate_batches(
     recurrent_net=engine.network.recurrent,
     batch_size=engine.batch_size,
     max_seqs=engine.max_seqs,
     max_seq_length=int(engine.max_seq_length),
     seq_drop=engine.seq_drop,
     shuffle_batches=engine.shuffle_batches,
     used_data_keys=engine.network.used_data_keys)
   engine.updater.set_learning_rate(engine.learning_rate)
   trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True)
   self.runner = trainer
   if self.cancel_flag:
     raise CancelTrainingException("Trainer cancel flag is set")
   trainer.run(report_prefix="hyper param tune train %r" % self.individual.name)
   if not trainer.finalized:
     print("Trainer exception:", trainer.run_exception, file=log.v1)
     raise trainer.run_exception
   cost = trainer.score["cost:output"]
   print(
     "Individual %s:" % self.individual.name,
     "Train cost:", cost,
     "elapsed time:", hms_fraction(time.time() - start_time),
     file=self.optim.log)
   self.individual.cost = cost
예제 #3
0
 def run(self):
   if self.individual.cost is not None:
     return self.individual.cost
   start_time = time.time()
   hyper_param_mapping = self.individual.hyper_param_mapping
   print("Training %r using hyper params:" % self.individual.name, file=log.v2)
   for p in self.optim.hyper_params:
     print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2)
   config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids)
   engine = Engine(config=config)
   train_data = StaticDataset.copy_from_dataset(self.optim.train_data)
   engine.init_train_from_config(config=config, train_data=train_data)
   # Not directly calling train() as we want to have full control.
   engine.epoch = 1
   train_data.init_seq_order(epoch=engine.epoch)
   batches = train_data.generate_batches(
     recurrent_net=engine.network.recurrent,
     batch_size=engine.batch_size,
     max_seqs=engine.max_seqs,
     max_seq_length=int(engine.max_seq_length),
     seq_drop=engine.seq_drop,
     shuffle_batches=engine.shuffle_batches,
     used_data_keys=engine.network.used_data_keys)
   engine.updater.set_learning_rate(engine.learning_rate, session=engine.tf_session)
   trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True)
   self.runner = trainer
   if self.cancel_flag:
     raise CancelTrainingException("Trainer cancel flag is set")
   trainer.run(report_prefix="hyper param tune train %r" % self.individual.name)
   if not trainer.finalized:
     print("Trainer exception:", trainer.run_exception, file=log.v1)
     raise trainer.run_exception
   cost = trainer.score["cost:output"]
   print(
     "Individual %s:" % self.individual.name,
     "Train cost:", cost,
     "elapsed time:", hms_fraction(time.time() - start_time),
     file=self.optim.log)
   self.individual.cost = cost
예제 #4
0
def create_graph(train_flag, eval_flag, search_flag, net_dict):
  """
  :param bool train_flag:
  :param bool eval_flag:
  :param bool search_flag:
  :param dict[str,dict[str]] net_dict:
  :return: adds to the current graph, and then returns the network
  :rtype: TFNetwork.TFNetwork
  """
  print("Loading network, train flag %s, eval flag %s, search flag %s" % (train_flag, eval_flag, search_flag))
  from TFEngine import Engine
  from TFNetwork import TFNetwork
  network, updater = Engine.create_network(
    config=config, rnd_seed=1,
    train_flag=train_flag, eval_flag=eval_flag, search_flag=search_flag,
    net_dict=net_dict)
  assert isinstance(network, TFNetwork)
  return network
예제 #5
0
def create_graph(train_flag, eval_flag, search_flag):
  """
  :param bool train_flag:
  :param bool eval_flag:
  :param bool search_flag:
  :return: adds to the current graph, and then returns the network
  :rtype: TFNetwork.TFNetwork
  """
  assert 'network' in config.typed_dict
  print("Loading network, train flag %s, eval flag %s, search flag %s" % (train_flag, eval_flag, search_flag))
  from TFEngine import Engine
  from TFNetwork import TFNetwork
  network, updater = Engine.create_network(
    config=config, rnd_seed=1,
    train_flag=train_flag, eval_flag=eval_flag, search_flag=search_flag,
    net_dict=config.typed_dict["network"])
  assert isinstance(network, TFNetwork)
  return network
예제 #6
0
def benchmark(lstm_unit, use_gpu):
  """
  :param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes
  :param bool use_gpu:
  :return: runtime in seconds of the training itself, excluding initialization
  :rtype: float
  """
  device = {True: "GPU", False: "CPU"}[use_gpu]
  key = "%s:%s" % (device, lstm_unit)
  print(">>> Start benchmark for %s." % key)
  config = Config()
  config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu))
  dataset_kwargs = config.typed_value("train")
  Dataset.kwargs_update_from_config(config, dataset_kwargs)
  dataset = init_dataset(dataset_kwargs)
  engine = Engine(config=config)
  engine.init_train_from_config(config=config, train_data=dataset)
  print(">>> Start training now for %s." % key)
  start_time = time.time()
  engine.train()
  runtime = time.time() - start_time
  print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime)))
  engine.finalize()
  return runtime
예제 #7
0
def benchmark(lstm_unit, use_gpu):
  """
  :param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes
  :param bool use_gpu:
  :return: runtime in seconds of the training itself, excluding initialization
  :rtype: float
  """
  device = {True: "GPU", False: "CPU"}[use_gpu]
  key = "%s:%s" % (device, lstm_unit)
  print(">>> Start benchmark for %s." % key)
  config = Config()
  config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu))
  dataset_kwargs = config.typed_value("train")
  Dataset.kwargs_update_from_config(config, dataset_kwargs)
  dataset = init_dataset(dataset_kwargs)
  engine = Engine(config=config)
  engine.init_train_from_config(config=config, train_data=dataset)
  print(">>> Start training now for %s." % key)
  start_time = time.time()
  engine.train()
  runtime = time.time() - start_time
  print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime)))
  engine.finalize()
  return runtime
예제 #8
0
  network={
    "fw0": {"class": "rec", "unit": "NativeLstm2", "dropout": 0.1, "n_out": 10},
    "output": {"class": "softmax", "loss": "ce", "from": ["fw0"]}
  },

  # training
  nadam=True,
  learning_rate=0.01,
  num_epochs=100,
  debug_add_check_numerics_ops=True,

  model="/tmp/%s/returnn-demo-as-framework/model" % get_login_username(),
  cleanup_old_models=True,

  learning_rate_control="newbob_multi_epoch",
  learning_rate_control_relative_error_relative_lr=True,
  newbob_multi_num_epochs=3, newbob_multi_update_interval=1, newbob_learning_rate_decay=0.9,
  learning_rate_file="/tmp/%s/returnn-demo-as-framework/newbob.data" % get_login_username(),

  # log
  log_verbosity=3
))

engine = Engine(config)

train_data = init_dataset({"class": "Task12AXDataset", "num_seqs": 1000, "name": "train"})
dev_data = init_dataset({"class": "Task12AXDataset", "num_seqs": 100, "name": "dev", "fixed_random_seed": 1})

engine.init_train_from_config(train_data=train_data, dev_data=dev_data)
engine.train()