def init(config_filename, log_verbosity, remaining_args=()): """ :param str config_filename: filename to config-file :param int log_verbosity: :param list[str] remaining_args: """ rnn.init_better_exchook() rnn.init_thread_join_hack() print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config( config_filename=config_filename, command_line_options=remaining_args, extra_updates={ "use_tensorflow": True, "log": None, "log_verbosity": log_verbosity, "task": "search", }, default_config={ "debug_print_layer_output_template": True, } ) global config config = rnn.config rnn.init_log() print("Returnn %s starting up." % os.path.basename(__file__), file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert Util.BackendEngine.is_tensorflow_selected(), "this is only for TensorFlow" rnn.init_faulthandler() better_exchook.replace_traceback_format_tb() # makes some debugging easier rnn.init_config_json_network()
def init(config_str, verbosity): """ :param str config_str: either filename to config-file, or dict for dataset :param int verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() datasetDict = None configFilename = None if config_str.strip().startswith("{"): print("Using dataset %s." % config_str) datasetDict = eval(config_str.strip()) elif config_str.endswith(".hdf"): datasetDict = {"class": "HDFDataset", "files": [config_str]} print("Using dataset %r." % datasetDict) assert os.path.exists(config_str) else: configFilename = config_str print("Using config file %r." % configFilename) assert os.path.exists(configFilename) rnn.init_config(config_filename=configFilename, default_config={"cache_size": "0"}) global config config = rnn.config config.set("log", None) config.set("log_verbosity", verbosity) if datasetDict: config.set("train", datasetDict) rnn.init_log() print("Returnn dump-dataset starting up.", file=log.v2) rnn.returnn_greeting() rnn.init_faulthandler() rnn.init_config_json_network() rnn.init_data() rnn.print_task_properties()
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config( config_filename=config_filename, extra_updates={ "use_tensorflow": True, "log": None, "log_verbosity": log_verbosity, "task": __file__, # just extra info for the config }) global config config = rnn.config rnn.init_log() print("Returnn compile-tf-graph starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert Util.BackendEngine.is_tensorflow_selected( ), "this is only for TensorFlow" rnn.init_faulthandler() rnn.init_config_json_network()
def init(config_str, verbosity): """ :param str config_str: either filename to config-file, or dict for dataset :param int verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() datasetDict = None configFilename = None if config_str.strip().startswith("{"): print("Using dataset %s." % config_str) datasetDict = eval(config_str.strip()) elif config_str.endswith(".hdf"): datasetDict = {"class": "HDFDataset", "files": [config_str]} print("Using dataset %r." % datasetDict) assert os.path.exists(config_str) else: configFilename = config_str print("Using config file %r." % configFilename) assert os.path.exists(configFilename) rnn.init_config(config_filename=configFilename, default_config={"cache_size": "0"}) global config config = rnn.config config.set("log", None) config.set("log_verbosity", verbosity) if datasetDict: config.set("train", datasetDict) rnn.init_log() print("Returnn dump-dataset starting up.", file=log.v2) rnn.returnn_greeting() rnn.init_faulthandler() rnn.init_config_json_network() rnn.init_data() rnn.print_task_properties()
def test_rnn_init_config_py_global_var(): import rnn import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile: cfgfile.write("""#!rnn.py task = config.value("task", "train") test_value = 42 def test_func(): return task """) cfgfile.flush() rnn.init_config(command_line_options=[cfgfile.name, "--task", "search"]) assert isinstance(rnn.config, Config) pprint(rnn.config.dict) pprint(rnn.config.typed_dict) assert rnn.config.has("task") assert rnn.config.has("test_value") assert rnn.config.has("test_func") assert_equal(rnn.config.value("task", None), "search") assert rnn.config.is_typed("test_value") assert_equal(rnn.config.typed_value("test_value"), 42) assert rnn.config.is_typed("test_func") # So far it's fine. # Now something a bit strange. # Earlier, this failed, because the command-line overwrote this. assert rnn.config.is_typed("task") test_func = rnn.config.typed_dict["test_func"] assert callable(test_func) assert_equal(test_func(), "search")
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("task", "calculate_wer") config.set("log", None) config.set("log_verbosity", log_verbosity) config.set("use_tensorflow", True) rnn.init_log() print("Returnn calculate-word-error-rate starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert Util.BackendEngine.is_tensorflow_selected( ), "this is only for TensorFlow" rnn.init_faulthandler() rnn.init_config_json_network() rnn.print_task_properties()
def test_rnn_init_config_py_cmd_type(): import rnn import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile: cfgfile.write("""#!rnn.py max_seq_length = {'bpe': 75} def test_func(): return max_seq_length """) cfgfile.flush() rnn.init_config( command_line_options=[cfgfile.name, "++max_seq_length", "0"]) assert isinstance(rnn.config, Config) assert rnn.config.has("max_seq_length") assert rnn.config.has("test_func") assert rnn.config.is_typed("max_seq_length") assert rnn.config.is_typed("test_func") test_func = rnn.config.typed_dict["test_func"] assert callable(test_func) assert_equal(test_func(), 0)
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("log", None) config.set("log_verbosity", log_verbosity) config.set("use_tensorflow", True) rnn.init_log() print("Returnn compile-native-op starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert Util.BackendEngine.is_tensorflow_selected( ), "this is only for TensorFlow" rnn.init_faulthandler() rnn.init_config_json_network() if 'network' in config.typed_dict: print("Loading network") from TFNetwork import TFNetwork network = TFNetwork(name="root", config=config, rnd_seed=1, train_flag=False, eval_flag=True, search_flag=False) network.construct_from_dict(config.typed_dict["network"])
def init(config_filename, cmd_line_opts, dataset_config_str): """ :param str config_filename: global config for CRNN :param list[str] cmd_line_opts: options for init_config method :param str dataset_config_str: dataset via init_dataset_via_str() """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: rnn.init_config(config_filename, cmd_line_opts) rnn.init_log() else: log.initialize(verbosity=[5]) print("Returnn hdf_dump starting up.", file=log.v3) rnn.init_faulthandler() if config_filename: rnn.init_data() rnn.print_task_properties() assert isinstance(rnn.train_data, Dataset) dataset = rnn.train_data else: assert dataset_config_str dataset = init_dataset(dataset_config_str) print("Source dataset:", dataset.len_info(), file=log.v3) return dataset
def init(config_filename, cmd_line_opts, dataset_config_str): """ :param str config_filename: global config for CRNN :param list[str] cmd_line_opts: options for init_config method :param str dataset_config_str: dataset via init_dataset_via_str() """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: rnn.init_config(config_filename, cmd_line_opts) rnn.init_log() else: log.initialize(verbosity=[5]) print("Returnn hdf_dump starting up.", file=log.v3) rnn.init_faulthandler() if config_filename: rnn.init_data() rnn.print_task_properties() assert isinstance(rnn.train_data, Dataset) dataset = rnn.train_data else: assert dataset_config_str dataset = init_dataset(dataset_config_str) print("Source dataset:", dataset.len_info(), file=log.v3) return dataset
def demo(): """ Will print out the different network topologies of the specified pretraining scheme. """ import better_exchook better_exchook.install() import rnn import sys if len(sys.argv) <= 1: print("usage: python %s [config] [other options]" % __file__) print("example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__) rnn.init_config(command_line_options=sys.argv[1:]) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.config.update({"log": []}) rnn.init_log() rnn.init_backend_engine() if not rnn.config.value("pretrain", ""): print("config option 'pretrain' not set, will set it for this demo to 'default'") rnn.config.set("pretrain", "default") pretrain = pretrain_from_config(rnn.config) print("pretrain: %s" % pretrain) num_pretrain_epochs = pretrain.get_train_num_epochs() from pprint import pprint for epoch in range(1, 1 + num_pretrain_epochs): print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs)) net_json = pretrain.get_network_json_for_epoch(epoch) pprint(net_json) print("done.")
def demo(): """ Will print out the different network topologies of the specified pretraining scheme. """ import better_exchook better_exchook.install() import rnn import sys if len(sys.argv) <= 1: print("usage: python %s [config] [other options]" % __file__) print( "example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__) rnn.init_config(command_line_options=sys.argv[1:]) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.config.update({"log": []}) rnn.init_log() rnn.init_backend_engine() if not rnn.config.value("pretrain", ""): print( "config option 'pretrain' not set, will set it for this demo to 'default'" ) rnn.config.set("pretrain", "default") pretrain = pretrain_from_config(rnn.config) print("pretrain: %s" % pretrain) num_pretrain_epochs = pretrain.get_train_num_epochs() from pprint import pprint for epoch in range(1, 1 + num_pretrain_epochs): print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs)) net_json = pretrain.get_network_json_for_epoch(epoch) pprint(net_json) print("done.")
def init(configFilename, commandLineOptions): rnn.init_better_exchook() rnn.init_config(configFilename, commandLineOptions) global config config = rnn.config config.set("log", []) rnn.init_log() print("CRNN dump-dataset starting up.", file=log.v3) rnn.init_config_json_network()
def init(configFilename, commandLineOptions): rnn.init_better_exchook() rnn.init_thread_join_hack() rnn.init_config(configFilename, commandLineOptions) global config config = rnn.config rnn.init_log() print("CRNN demo-dataset starting up", file=log.v3) rnn.init_faulthandler() rnn.init_config_json_network() rnn.init_data() rnn.print_task_properties()
def init(configFilename=None): rnn.init_better_exchook() rnn.init_thread_join_hack() if configFilename: rnn.init_config(configFilename, command_line_options=[]) rnn.init_log() else: log.initialize() print("Returnn collect-words starting up.", file=log.v3) rnn.init_faulthandler() if configFilename: rnn.init_config_json_network() rnn.init_data() rnn.print_task_properties()
def init(configFilename=None): rnn.init_better_exchook() rnn.init_thread_join_hack() if configFilename: rnn.init_config(configFilename, command_line_options=[]) rnn.init_log() else: log.initialize() print("Returnn collect-words starting up.", file=log.v3) rnn.init_faulthandler() if configFilename: rnn.init_config_json_network() rnn.init_data() rnn.print_task_properties()
def demo(): """ Will print out the different network topologies of the specified pretraining scheme. """ import better_exchook better_exchook.install() import rnn import argparse from Util import dict_diff_str arg_parser = argparse.ArgumentParser() arg_parser.add_argument("config") arg_parser.add_argument("--diff", action="store_true", help="show diff only") arg_parser.add_argument('other_returnn_args', nargs=argparse.REMAINDER, help="config updates or so") args = arg_parser.parse_args() rnn.init_config(config_filename=args.config, command_line_options=args.other_returnn_args, extra_updates={"log": []}) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.init_log() if not rnn.config.value("pretrain", ""): print( "config option 'pretrain' not set, will set it for this demo to 'default'" ) rnn.config.set("pretrain", "default") pretrain = pretrain_from_config(rnn.config) print("pretrain: %s" % pretrain) num_pretrain_epochs = pretrain.get_train_num_epochs() last_net_json = None from pprint import pprint for epoch in range(1, 1 + num_pretrain_epochs): print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs)) net_json = pretrain.get_network_json_for_epoch(epoch) if args.diff: if last_net_json is not None: print(dict_diff_str(last_net_json, net_json)) else: print("(initial)") else: pprint(net_json) last_net_json = net_json print("done.")
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("task", "dump") config.set("log", None) config.set("log_verbosity", log_verbosity) rnn.init_log() print("Returnn dump-dataset-raw-strings starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_faulthandler()
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() if config_filename: print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("task", "dump") config.set("log", None) config.set("log_verbosity", log_verbosity) rnn.init_log() print("Returnn dump-dataset-raw-strings starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_faulthandler()
def init(config_filename, log_verbosity): """ :param str config_filename: filename to config-file :param int log_verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, command_line_options=[]) global config config = rnn.config config.set("log", None) config.set("log_verbosity", log_verbosity) config.set("use_tensorflow", True) rnn.init_log() print("Returnn compile-native-op starting up.", file=log.v1) rnn.returnn_greeting() rnn.init_backend_engine() assert Util.BackendEngine.is_tensorflow_selected(), "this is only for TensorFlow" rnn.init_faulthandler() rnn.init_config_json_network()
def test_rnn_init_config_py_cmd_type(): import rnn import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile: cfgfile.write("""#!rnn.py max_seq_length = {'bpe': 75} def test_func(): return max_seq_length """) cfgfile.flush() rnn.init_config(command_line_options=[cfgfile.name, "++max_seq_length", "0"]) assert isinstance(rnn.config, Config) assert rnn.config.has("max_seq_length") assert rnn.config.has("test_func") assert rnn.config.is_typed("max_seq_length") assert rnn.config.is_typed("test_func") test_func = rnn.config.typed_dict["test_func"] assert callable(test_func) assert_equal(test_func(), 0)
def test_rnn_init_config_py_global_var(): import rnn import tempfile with tempfile.NamedTemporaryFile(mode="w", suffix=".config", prefix="test_rnn_initConfig") as cfgfile: cfgfile.write("""#!rnn.py task = config.value("task", "train") test_value = 42 def test_func(): return task """) cfgfile.flush() rnn.init_config( command_line_options=[cfgfile.name, "--task", "search"]) assert isinstance(rnn.config, Config) pprint(rnn.config.dict) pprint(rnn.config.typed_dict) assert rnn.config.has("task") assert rnn.config.has("test_value") assert rnn.config.has("test_func") assert_equal(rnn.config.value("task", None), "search") assert rnn.config.is_typed("test_value") assert_equal(rnn.config.typed_value("test_value"), 42) assert rnn.config.is_typed("test_func") # So far it's fine. # Now something a bit strange. # Earlier, this failed, because the command-line overwrote this. assert rnn.config.is_typed("task") test_func = rnn.config.typed_dict["test_func"] assert callable(test_func) assert_equal(test_func(), "search")
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)
def demo(): """ Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set, given some config. """ import better_exchook better_exchook.install() import rnn import sys if len(sys.argv) <= 1: print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__) print( ("example usage: " "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__) rnn.init_config(command_line_options=sys.argv[1:]) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.config.update({"log": []}) rnn.init_log() rnn.init_backend_engine() check_lr = rnn.config.bool("check_learning_rates", False) from Pretrain import pretrain_from_config pretrain = pretrain_from_config(rnn.config) first_non_pretrain_epoch = 1 pretrain_learning_rate = None if pretrain: first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1 log.initialize(verbosity=[5]) control = load_learning_rate_control_from_config(rnn.config) print("LearningRateControl: %r" % control) if not control.epoch_data: print("No epoch data so far.") return first_epoch = min(control.epoch_data.keys()) if first_epoch != 1: print("Strange, first epoch from epoch data is %i." % first_epoch) print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error)) if pretrain: pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate) max_epoch = max(control.epoch_data.keys()) for epoch in range(1, max_epoch + 2): # all epochs [1..max_epoch+1] old_learning_rate = None if epoch in control.epoch_data: old_learning_rate = control.epoch_data[epoch].learning_rate if epoch < first_non_pretrain_epoch: learning_rate = pretrain_learning_rate s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate) elif 1 < first_non_pretrain_epoch == epoch: learning_rate = control.default_learning_rate s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % ( epoch, learning_rate, old_learning_rate) else: learning_rate = control.calc_new_learning_rate_for_epoch(epoch) s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate) if learning_rate < control.min_learning_rate: learning_rate = control.min_learning_rate s += ", clipped to %s" % learning_rate s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1) if hasattr(control, "_calc_recent_mean_relative_error"): # noinspection PyProtectedMember s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch) print(s) if check_lr and old_learning_rate is not None: if old_learning_rate != learning_rate: print("Learning rate is different in epoch %i!" % epoch) sys.exit(1) # Overwrite new learning rate so that the calculation for further learning rates stays consistent. if epoch in control.epoch_data: control.epoch_data[epoch].learning_rate = learning_rate else: control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate) print("Finished, last stored epoch was %i." % max_epoch)
def demo(): """ Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set, given some config. """ import better_exchook better_exchook.install() import rnn import sys if len(sys.argv) <= 1: print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__) print( ("example usage: " "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__) rnn.init_config(command_line_options=sys.argv[1:]) # noinspection PyProtectedMember rnn.config._hack_value_reading_debug() rnn.config.update({"log": []}) rnn.init_log() rnn.init_backend_engine() check_lr = rnn.config.bool("check_learning_rates", False) from Pretrain import pretrain_from_config pretrain = pretrain_from_config(rnn.config) first_non_pretrain_epoch = 1 pretrain_learning_rate = None if pretrain: first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1 log.initialize(verbosity=[5]) control = load_learning_rate_control_from_config(rnn.config) print("LearningRateControl: %r" % control) if not control.epoch_data: print("No epoch data so far.") return first_epoch = min(control.epoch_data.keys()) if first_epoch != 1: print("Strange, first epoch from epoch data is %i." % first_epoch) print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error)) if pretrain: pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate) max_epoch = max(control.epoch_data.keys()) for epoch in range(1, max_epoch + 2): # all epochs [1..max_epoch+1] old_learning_rate = None if epoch in control.epoch_data: old_learning_rate = control.epoch_data[epoch].learning_rate if epoch < first_non_pretrain_epoch: learning_rate = pretrain_learning_rate s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate) elif 1 < first_non_pretrain_epoch == epoch: learning_rate = control.default_learning_rate s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % ( epoch, learning_rate, old_learning_rate) else: learning_rate = control.calc_new_learning_rate_for_epoch(epoch) s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate) if learning_rate < control.min_learning_rate: learning_rate = control.min_learning_rate s += ", clipped to %s" % learning_rate s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1) if hasattr(control, "_calc_recent_mean_relative_error"): # noinspection PyProtectedMember s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch) print(s) if check_lr and old_learning_rate is not None: if old_learning_rate != learning_rate: print("Learning rate is different in epoch %i!" % epoch) sys.exit(1) # Overwrite new learning rate so that the calculation for further learning rates stays consistent. if epoch in control.epoch_data: control.epoch_data[epoch].learning_rate = learning_rate else: control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate) print("Finished, last stored epoch was %i." % max_epoch)
def init(config_str, config_dataset, use_pretrain, epoch, verbosity): """ :param str config_str: either filename to config-file, or dict for dataset :param str|None config_dataset: :param bool use_pretrain: might overwrite config options, or even the dataset :param int epoch: :param int verbosity: """ rnn.init_better_exchook() rnn.init_thread_join_hack() dataset_opts = None config_filename = None if config_str.strip().startswith("{"): print("Using dataset %s." % config_str) dataset_opts = eval(config_str.strip()) elif config_str.endswith(".hdf"): dataset_opts = {"class": "HDFDataset", "files": [config_str]} print("Using dataset %r." % dataset_opts) assert os.path.exists(config_str) else: config_filename = config_str print("Using config file %r." % config_filename) assert os.path.exists(config_filename) rnn.init_config(config_filename=config_filename, default_config={"cache_size": "0"}) global config config = rnn.config config.set("log", None) config.set("log_verbosity", verbosity) rnn.init_log() print("Returnn %s starting up." % __file__, file=log.v2) rnn.returnn_greeting() rnn.init_faulthandler() rnn.init_config_json_network() Util.BackendEngine.select_engine(config=config) if not dataset_opts: if config_dataset: dataset_opts = "config:%s" % config_dataset else: dataset_opts = "config:train" if use_pretrain: from Pretrain import pretrain_from_config pretrain = pretrain_from_config(config) if pretrain: print("Using pretrain %s, epoch %i" % (pretrain, epoch), file=log.v2) net_dict = pretrain.get_network_json_for_epoch(epoch=epoch) if "#config" in net_dict: config_overwrites = net_dict["#config"] print("Pretrain overwrites these config options:", file=log.v2) assert isinstance(config_overwrites, dict) for key, value in sorted(config_overwrites.items()): assert isinstance(key, str) orig_value = config.typed_dict.get(key, None) if isinstance(orig_value, dict) and isinstance(value, dict): diff_str = "\n" + Util.dict_diff_str(orig_value, value) elif isinstance(value, dict): diff_str = "\n%r ->\n%s" % (orig_value, pformat(value)) else: diff_str = " %r -> %r" % (orig_value, value) print("Config key %r for epoch %i:%s" % (key, epoch, diff_str), file=log.v2) config.set(key, value) else: print("No config overwrites for this epoch.", file=log.v2) else: print("No pretraining used.", file=log.v2) elif config.typed_dict.get("pretrain", None): print("Not using pretrain.", file=log.v2) dataset_default_opts = {} Dataset.kwargs_update_from_config(config, dataset_default_opts) print("Using dataset:", dataset_opts, file=log.v2) global dataset dataset = init_dataset(dataset_opts, default_kwargs=dataset_default_opts) assert isinstance(dataset, Dataset) dataset.init_seq_order(epoch=epoch)
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) assert rnn.config config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [ EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True] ] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)