def main(argv): arg_parser = argparse.ArgumentParser(description='Dump search scores and other info to HDF file.') arg_parser.add_argument('config', help="filename to config-file") arg_parser.add_argument("--dataset", default="config:train") arg_parser.add_argument("--epoch", type=int, default=-1, help="-1 for last epoch") arg_parser.add_argument("--output_file", help='hdf', required=True) arg_parser.add_argument("--rec_layer_name", default="output") arg_parser.add_argument("--cheating", action="store_true", help="add ground truth to the beam") arg_parser.add_argument("--att_weights", action="store_true", help="dump all softmax_over_spatial layers") arg_parser.add_argument("--verbosity", default=4, type=int, help="5 for all seqs (default: 4)") arg_parser.add_argument("--seq_list", nargs="+", help="use only these seqs") args, remaining_args = arg_parser.parse_known_args(argv[1:]) init(config_filename=args.config, log_verbosity=args.verbosity, remaining_args=remaining_args) dataset = init_dataset(args.dataset) print("Dataset:") pprint(dataset) if args.seq_list: dataset.seq_tags_filter = set(args.seq_list) dataset.partition_epoch = 1 # reset if isinstance(dataset, MetaDataset): for sub_dataset in dataset.datasets.values(): dataset.seq_tags_filter = set(args.seq_list) sub_dataset.partition_epoch = 1 dataset.finish_epoch() # enforce reset if dataset.seq_tags_filter is not None: print("Using sequences:") pprint(dataset.seq_tags_filter) if args.epoch >= 1: config.set("load_epoch", args.epoch) def net_dict_post_proc(net_dict): """ :param dict[str] net_dict: :return: net_dict :rtype: dict[str] """ prepare_compile( rec_layer_name=args.rec_layer_name, net_dict=net_dict, cheating=args.cheating, dump_att_weights=args.att_weights, hdf_filename=args.output_file, possible_labels=dataset.labels) return net_dict engine = Engine(config=config) engine.use_search_flag = True engine.init_network_from_config(config, net_dict_post_proc=net_dict_post_proc) engine.search( dataset, do_eval=config.bool("search_do_eval", True), output_layer_names=args.rec_layer_name) engine.finalize() print("Search finished.") assert os.path.exists(args.output_file), "hdf file not dumped?"
def run(self): if self.individual.cost is not None: return self.individual.cost start_time = time.time() hyper_param_mapping = self.individual.hyper_param_mapping print("Training %r using hyper params:" % self.individual.name, file=log.v2) for p in self.optim.hyper_params: print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2) config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids) engine = Engine(config=config) train_data = StaticDataset.copy_from_dataset(self.optim.train_data) engine.init_train_from_config(config=config, train_data=train_data) # Not directly calling train() as we want to have full control. engine.epoch = 1 train_data.init_seq_order(epoch=engine.epoch) batches = train_data.generate_batches( recurrent_net=engine.network.recurrent, batch_size=engine.batch_size, max_seqs=engine.max_seqs, max_seq_length=int(engine.max_seq_length), seq_drop=engine.seq_drop, shuffle_batches=engine.shuffle_batches, used_data_keys=engine.network.used_data_keys) engine.updater.set_learning_rate(engine.learning_rate) trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True) self.runner = trainer if self.cancel_flag: raise CancelTrainingException("Trainer cancel flag is set") trainer.run(report_prefix="hyper param tune train %r" % self.individual.name) if not trainer.finalized: print("Trainer exception:", trainer.run_exception, file=log.v1) raise trainer.run_exception cost = trainer.score["cost:output"] print( "Individual %s:" % self.individual.name, "Train cost:", cost, "elapsed time:", hms_fraction(time.time() - start_time), file=self.optim.log) self.individual.cost = cost
def run(self): if self.individual.cost is not None: return self.individual.cost start_time = time.time() hyper_param_mapping = self.individual.hyper_param_mapping print("Training %r using hyper params:" % self.individual.name, file=log.v2) for p in self.optim.hyper_params: print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2) config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids) engine = Engine(config=config) train_data = StaticDataset.copy_from_dataset(self.optim.train_data) engine.init_train_from_config(config=config, train_data=train_data) # Not directly calling train() as we want to have full control. engine.epoch = 1 train_data.init_seq_order(epoch=engine.epoch) batches = train_data.generate_batches( recurrent_net=engine.network.recurrent, batch_size=engine.batch_size, max_seqs=engine.max_seqs, max_seq_length=int(engine.max_seq_length), seq_drop=engine.seq_drop, shuffle_batches=engine.shuffle_batches, used_data_keys=engine.network.used_data_keys) engine.updater.set_learning_rate(engine.learning_rate, session=engine.tf_session) trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True) self.runner = trainer if self.cancel_flag: raise CancelTrainingException("Trainer cancel flag is set") trainer.run(report_prefix="hyper param tune train %r" % self.individual.name) if not trainer.finalized: print("Trainer exception:", trainer.run_exception, file=log.v1) raise trainer.run_exception cost = trainer.score["cost:output"] print( "Individual %s:" % self.individual.name, "Train cost:", cost, "elapsed time:", hms_fraction(time.time() - start_time), file=self.optim.log) self.individual.cost = cost
def create_graph(train_flag, eval_flag, search_flag, net_dict): """ :param bool train_flag: :param bool eval_flag: :param bool search_flag: :param dict[str,dict[str]] net_dict: :return: adds to the current graph, and then returns the network :rtype: TFNetwork.TFNetwork """ print("Loading network, train flag %s, eval flag %s, search flag %s" % (train_flag, eval_flag, search_flag)) from TFEngine import Engine from TFNetwork import TFNetwork network, updater = Engine.create_network( config=config, rnd_seed=1, train_flag=train_flag, eval_flag=eval_flag, search_flag=search_flag, net_dict=net_dict) assert isinstance(network, TFNetwork) return network
def create_graph(train_flag, eval_flag, search_flag): """ :param bool train_flag: :param bool eval_flag: :param bool search_flag: :return: adds to the current graph, and then returns the network :rtype: TFNetwork.TFNetwork """ assert 'network' in config.typed_dict print("Loading network, train flag %s, eval flag %s, search flag %s" % (train_flag, eval_flag, search_flag)) from TFEngine import Engine from TFNetwork import TFNetwork network, updater = Engine.create_network( config=config, rnd_seed=1, train_flag=train_flag, eval_flag=eval_flag, search_flag=search_flag, net_dict=config.typed_dict["network"]) assert isinstance(network, TFNetwork) return network
def benchmark(lstm_unit, use_gpu): """ :param str lstm_unit: e.g. "LSTMBlock", one of LstmCellTypes :param bool use_gpu: :return: runtime in seconds of the training itself, excluding initialization :rtype: float """ device = {True: "GPU", False: "CPU"}[use_gpu] key = "%s:%s" % (device, lstm_unit) print(">>> Start benchmark for %s." % key) config = Config() config.update(make_config_dict(lstm_unit=lstm_unit, use_gpu=use_gpu)) dataset_kwargs = config.typed_value("train") Dataset.kwargs_update_from_config(config, dataset_kwargs) dataset = init_dataset(dataset_kwargs) engine = Engine(config=config) engine.init_train_from_config(config=config, train_data=dataset) print(">>> Start training now for %s." % key) start_time = time.time() engine.train() runtime = time.time() - start_time print(">>> Runtime of %s: %s" % (key, hms_fraction(runtime))) engine.finalize() return runtime
network={ "fw0": {"class": "rec", "unit": "NativeLstm2", "dropout": 0.1, "n_out": 10}, "output": {"class": "softmax", "loss": "ce", "from": ["fw0"]} }, # training nadam=True, learning_rate=0.01, num_epochs=100, debug_add_check_numerics_ops=True, model="/tmp/%s/returnn-demo-as-framework/model" % get_login_username(), cleanup_old_models=True, learning_rate_control="newbob_multi_epoch", learning_rate_control_relative_error_relative_lr=True, newbob_multi_num_epochs=3, newbob_multi_update_interval=1, newbob_learning_rate_decay=0.9, learning_rate_file="/tmp/%s/returnn-demo-as-framework/newbob.data" % get_login_username(), # log log_verbosity=3 )) engine = Engine(config) train_data = init_dataset({"class": "Task12AXDataset", "num_seqs": 1000, "name": "train"}) dev_data = init_dataset({"class": "Task12AXDataset", "num_seqs": 100, "name": "dev", "fixed_random_seed": 1}) engine.init_train_from_config(train_data=train_data, dev_data=dev_data) engine.train()