Example #1
0
def init(config_str, verbosity):
    """
  :param str config_str: either filename to config-file, or dict for dataset
  :param int verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    datasetDict = None
    configFilename = None
    if config_str.strip().startswith("{"):
        print("Using dataset %s." % config_str)
        datasetDict = eval(config_str.strip())
    elif config_str.endswith(".hdf"):
        datasetDict = {"class": "HDFDataset", "files": [config_str]}
        print("Using dataset %r." % datasetDict)
        assert os.path.exists(config_str)
    else:
        configFilename = config_str
        print("Using config file %r." % configFilename)
        assert os.path.exists(configFilename)
    rnn.initConfig(configFilename=configFilename,
                   default_config={"cache_size": "0"})
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", verbosity)
    if datasetDict:
        config.set("train", datasetDict)
    rnn.initLog()
    print("Returnn dump-dataset starting up.", file=log.v2)
    rnn.returnnGreeting()
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Example #2
0
def init(config_str):
    """
  :param str config_str: either filename to config-file, or dict for dataset
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_str.startswith("{"):
        print("Using dataset %s." % config_str)
        datasetDict = eval(config_str)
        configFilename = None
    else:
        datasetDict = None
        configFilename = config_str
        print("Using config file %r." % configFilename)
        assert os.path.exists(configFilename)
    rnn.initConfig(configFilename=configFilename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("log", None)
    if datasetDict:
        config.set("train", datasetDict)
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v1)
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Example #3
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++pretrain default ++pretrain_construction_algo from_input" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.initLog()
  rnn.initBackendEngine()
  if not rnn.config.value("pretrain", ""):
    print("config option 'pretrain' not set, will set it for this demo to 'default'")
    rnn.config.set("pretrain", "default")
  pretrain = pretrainFromConfig(rnn.config)
  print("pretrain: %s" % pretrain)
  num_pretrain_epochs = pretrain.get_train_num_epochs()
  from pprint import pprint
  for epoch in range(1, 1 + num_pretrain_epochs):
    print("epoch %i (of %i) network json:" % (epoch, num_pretrain_epochs))
    net_json = pretrain.get_network_json_for_epoch(epoch)
    pprint(net_json)
  print("done.")
Example #4
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.initLog()
    print("Returnn compile-native-op starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initBackendEngine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    if 'network' in config.typed_dict:
        print("Loading network")
        from TFNetwork import TFNetwork
        network = TFNetwork(name="root",
                            config=config,
                            rnd_seed=1,
                            train_flag=False,
                            eval_flag=True,
                            search_flag=False)
        network.construct_from_dict(config.typed_dict["network"])
Example #5
0
def init(config_filename, cmd_line_opts, dataset_config_str):
  """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if config_filename:
    rnn.initConfig(config_filename, cmd_line_opts)
    rnn.initLog()
  else:
    log.initialize(verbosity=[5])
  print >> log.v3, "CRNN dump-dataset starting up."
  rnn.initFaulthandler()
  rnn.initConfigJsonNetwork()
  if config_filename:
    rnn.initData()
    rnn.printTaskProperties()
    assert isinstance(rnn.train_data, Dataset)
    return rnn.train_data
  else:
    assert dataset_config_str
    dataset = init_dataset_via_str(dataset_config_str)
    print >> log.v3, "Source dataset:", dataset.len_info()
    return dataset
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("task", "calculate_wer")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    config.set("use_tensorflow", True)
    rnn.initLog()
    print("Returnn calculate-word-error-rate starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initBackendEngine()
    assert Util.BackendEngine.is_tensorflow_selected(
    ), "this is only for TensorFlow"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.printTaskProperties()
Example #7
0
def init(config_filename, cmd_line_opts, dataset_config_str):
    """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        rnn.initConfig(config_filename, cmd_line_opts)
        rnn.initLog()
    else:
        log.initialize(verbosity=[5])
    print("Returnn hdf_dump starting up.", file=log.v3)
    rnn.initFaulthandler()
    if config_filename:
        rnn.initData()
        rnn.printTaskProperties()
        assert isinstance(rnn.train_data, Dataset)
        return rnn.train_data
    else:
        assert dataset_config_str
        dataset = init_dataset_via_str(dataset_config_str)
        print("Source dataset:", dataset.len_info(), file=log.v3)
        return dataset
Example #8
0
def init(configFilename, commandLineOptions):
  rnn.initBetterExchook()
  rnn.initConfig(configFilename, commandLineOptions)
  global config
  config = rnn.config
  config.set("log", [])
  rnn.initLog()
  print >> log.v3, "CRNN dump-dataset starting up."
  rnn.initConfigJsonNetwork()
Example #9
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    config.set("log", [])
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v3)
    rnn.initConfigJsonNetwork()
Example #10
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    rnn.initLog()
    print >> log.v3, "CRNN demo-dataset starting up"
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Example #11
0
def init(configFilename, commandLineOptions):
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  rnn.initConfig(configFilename, commandLineOptions)
  global config
  config = rnn.config
  rnn.initLog()
  print >> log.v3, "CRNN demo-dataset starting up"
  rnn.initFaulthandler()
  rnn.initConfigJsonNetwork()
  rnn.initData()
  rnn.printTaskProperties()
Example #12
0
def init(configFilename, commandLineOptions):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    rnn.initConfig(configFilename, commandLineOptions)
    global config
    config = rnn.config
    config.set("log", None)
    rnn.initLog()
    print("CRNN dump-dataset starting up.", file=log.v1)
    rnn.initFaulthandler()
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Example #13
0
def init(configFilename=None):
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if configFilename:
    rnn.initConfig(configFilename, commandLineOptions=[])
    rnn.initLog()
  else:
    log.initialize()
  print >> log.v3, "CRNN collect-orth-symbols starting up."
  rnn.initFaulthandler()
  if configFilename:
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
def init(configFilename=None):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if configFilename:
        rnn.initConfig(configFilename, commandLineOptions=[])
        rnn.initLog()
    else:
        log.initialize()
    print("CRNN collect-orth-symbols starting up.", file=log.v3)
    rnn.initFaulthandler()
    if configFilename:
        rnn.initConfigJsonNetwork()
        rnn.initData()
        rnn.printTaskProperties()
Example #15
0
def initBase(configfile=None, targetMode=None, epoch=None):
  """
  :type configfile: str | None
  """

  global isInitialized
  isInitialized = True
  # Run through in any case. Maybe just to set targetMode.

  global config
  if not config:
    if configfile is None:
      configfile = DefaultSprintCrnnConfig
    assert os.path.exists(configfile)

    rnn.initThreadJoinHack()
    rnn.initConfig(configfile, [])
    config = rnn.config
    rnn.initLog()
    rnn.initConfigJsonNetwork()

  if targetMode:
    setTargetMode(targetMode)

  initDataset()

  if targetMode and targetMode == "forward" and epoch:
    model_filename = config.value('model', '')
    fns = [Engine.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]]
    fns_existing = [fn for fn in fns if os.path.exists(fn)]
    assert len(fns_existing) == 1, "%s not found" % fns
    model_epoch_filename = fns_existing[0]
    config.set('load', model_epoch_filename)
    assert Engine.get_epoch_model(config)[1] == model_epoch_filename

  global engine
  if not engine:
    devices = rnn.initDevices()
    rnn.printTaskProperties(devices)
    rnn.initEngine(devices)
    engine = rnn.engine
    assert isinstance(engine, Engine)
Example #16
0
def init(config_filename, log_verbosity):
    """
  :param str config_filename: filename to config-file
  :param int log_verbosity:
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        print("Using config file %r." % config_filename)
        assert os.path.exists(config_filename)
    rnn.initConfig(configFilename=config_filename, commandLineOptions=[])
    global config
    config = rnn.config
    config.set("task", "dump")
    config.set("log", None)
    config.set("log_verbosity", log_verbosity)
    rnn.initLog()
    print("Returnn dump-dataset-raw-strings starting up.", file=log.v1)
    rnn.returnnGreeting()
    rnn.initFaulthandler()
Example #17
0
def init(configFilename, commandLineOptions, args):
    rnn.initBetterExchook()
    config_updates = {
        "log": None,
        "task": "eval",
        "eval": "config:get_dataset(%r)" % args.data,
        "train": None,
        "dev": None,
        "need_data": True,
    }
    if args.epoch:
        config_updates["load_epoch"] = args.epoch
    if args.do_search:
        config_updates.update({
            "task": "search",
            "search_data": "config:get_dataset(%r)" % args.data,
            "search_do_eval": False,
            "beam_size": int(args.beam_size),
            "max_seq_length": 0,
        })

    rnn.init(configFilename=configFilename,
             commandLineOptions=commandLineOptions,
             config_updates=config_updates,
             extra_greeting="CRNN dump-forward starting up.")
    rnn.engine.init_train_from_config(config=rnn.config, train_data=None)

    if rnn.engine.pretrain:
        new_network_desc = rnn.engine.pretrain.get_network_json_for_epoch(
            rnn.engine.epoch)
        rnn.engine.maybe_init_new_network(new_network_desc)
    global config
    config = rnn.config
    config.set("log", [])
    rnn.initLog()
    print("CRNN get-attention-weights starting up.", file=log.v3)
Example #18
0
def initBase(configfile=None, targetMode=None, epoch=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None targetMode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  """

    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.initBetterExchook()
        rnn.initThreadJoinHack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.initConfig(configFilename=configfile)
        config = rnn.config

        rnn.initLog()
        rnn.returnnGreeting(configFilename=configfile)
        rnn.initBackendEngine()
        rnn.initFaulthandler(sigusr1_chain=True)
        rnn.initConfigJsonNetwork()

        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            import TFEngine
            global Engine
            Engine = TFEngine.Engine

        import atexit
        atexit.register(_at_exit_handler)

    if targetMode:
        setTargetMode(targetMode)

    initDataset()

    if targetMode and targetMode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            Engine.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert Engine.get_epoch_model(config)[1] == model_epoch_filename, \
          "%r != %r" % (Engine.get_epoch_model(config), model_epoch_filename)

    global engine
    if not engine:
        devices = rnn.initDevices()
        rnn.printTaskProperties(devices)
        rnn.initEngine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)
def demo():
    import better_exchook
    better_exchook.install()
    import rnn
    import sys
    if len(sys.argv) <= 1:
        print(
            "usage: python %s [config] [other options] [++check_learning_rates 1]"
            % __file__)
        print(
            "example usage: python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001"
            % __file__)
    rnn.initConfig(commandLineOptions=sys.argv[1:])
    rnn.config._hack_value_reading_debug()
    rnn.config.update({"log": []})
    rnn.initLog()
    rnn.initBackendEngine()
    check_lr = rnn.config.bool("check_learning_rates", False)
    from Pretrain import pretrainFromConfig
    pretrain = pretrainFromConfig(rnn.config)
    first_non_pretrain_epoch = 1
    pretrain_learning_rate = None
    if pretrain:
        first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
    log.initialize(verbosity=[5])
    control = loadLearningRateControlFromConfig(rnn.config)
    print("LearningRateControl: %r" % control)
    if not control.epochData:
        print("No epoch data so far.")
        return
    firstEpoch = min(control.epochData.keys())
    if firstEpoch != 1:
        print("Strange, first epoch from epoch data is %i." % firstEpoch)
    print("Error key: %s from %r" % (control.getErrorKey(epoch=firstEpoch),
                                     control.epochData[firstEpoch].error))
    if pretrain:
        pretrain_learning_rate = rnn.config.float('pretrain_learning_rate',
                                                  control.defaultLearningRate)
    maxEpoch = max(control.epochData.keys())
    for epoch in range(1, maxEpoch + 2):  # all epochs [1..maxEpoch+1]
        oldLearningRate = None
        if epoch in control.epochData:
            oldLearningRate = control.epochData[epoch].learningRate
        if epoch < first_non_pretrain_epoch:
            learningRate = pretrain_learning_rate
            s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (
                epoch, learningRate, oldLearningRate)
        elif first_non_pretrain_epoch > 1 and epoch == first_non_pretrain_epoch:
            learningRate = control.defaultLearningRate
            s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (
                epoch, learningRate, oldLearningRate)
        else:
            learningRate = control.calcNewLearnignRateForEpoch(epoch)
            s = "Calculated learning rate for epoch %i: %s (was: %s)" % (
                epoch, learningRate, oldLearningRate)
        if learningRate < control.minLearningRate:
            learningRate = control.minLearningRate
            s += ", clipped to %s" % learningRate
        s += ", previous relative error: %s" % control.calcRelativeError(
            epoch - 2, epoch - 1)
        if hasattr(control, "_calcRecentMeanRelativeError"):
            s += ", previous mean relative error: %s" % control._calcRecentMeanRelativeError(
                epoch)
        print(s)
        if check_lr and oldLearningRate is not None:
            if oldLearningRate != learningRate:
                print("Learning rate is different in epoch %i!" % epoch)
                sys.exit(1)
        # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
        if epoch in control.epochData:
            control.epochData[epoch].learningRate = learningRate
        else:
            control.epochData[epoch] = control.EpochData(
                learningRate=learningRate)
    print("Finished, last stored epoch was %i." % maxEpoch)