Exemple #1
0
    def _load_state_tying(self, reload=False):
        '''
    loads a state tying map from a file, loads the file and returns its content
    :param stFile: state tying map file (allo_syntax int)
    :return state_tying: variable with state tying mapping
    where:
      statetying.allo_map important
    '''
        from os.path import isfile
        from Log import log
        from LmDataset import StateTying

        if not isinstance(self.state_tying, StateTying):
            reload = True

        if reload:
            print("Loading state tying file:", self.state_tying_name)

            assert isfile(
                self.state_tying_name), "State tying file does not exists"

            log.initialize(verbosity=[5])
            self.state_tying = StateTying(self.state_tying_name)

            print("Finished state tying mapping:",
                  len(self.state_tying.allo_map), "allos to int")
Exemple #2
0
def _main(argv):
    import better_exchook
    better_exchook.install()
    log.initialize(verbosity=[5])
    print("LmDataset demo startup")
    kwargs = eval(argv[0])
    print("Creating LmDataset with kwargs=%r ..." % kwargs)
    dataset = LmDataset(**kwargs)
    print("init_seq_order ...")
    dataset.init_seq_order(epoch=1)

    seq_idx = 0
    last_log_time = time.time()
    print("start iterating through seqs ...")
    while dataset.is_less_than_num_seqs(seq_idx):
        if seq_idx == 0:
            print("load_seqs with seq_idx=%i ...." % seq_idx)
        dataset.load_seqs(seq_idx, seq_idx + 1)

        if time.time() - last_log_time > 2.0:
            last_log_time = time.time()
            print(
                "Loading %s progress, %i/%i (%.0f%%) seqs loaded (%.0f%% skipped), (%.0f%% unknown) total syms %i ..."
                % (dataset.__class__.__name__, dataset.next_orth_idx,
                   dataset.estimated_num_seqs, 100.0 * dataset.next_orth_idx /
                   dataset.estimated_num_seqs, 100.0 * dataset.num_skipped /
                   (dataset.next_orth_idx or 1), 100.0 * dataset.num_unknown /
                   dataset._num_timesteps_accumulated["data"],
                   dataset._num_timesteps_accumulated["data"]))

        seq_idx += 1

    print("finished iterating, num seqs: %i" % seq_idx)
    print("dataset len:", dataset.len_info())
Exemple #3
0
def init(config_filename, cmd_line_opts, dataset_config_str):
  """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for init_config method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  if config_filename:
    rnn.init_config(config_filename, cmd_line_opts)
    rnn.init_log()
  else:
    log.initialize(verbosity=[5])
  print("Returnn hdf_dump starting up.", file=log.v3)
  rnn.init_faulthandler()
  if config_filename:
    rnn.init_data()
    rnn.print_task_properties()
    assert isinstance(rnn.train_data, Dataset)
    dataset = rnn.train_data
  else:
    assert dataset_config_str
    dataset = init_dataset(dataset_config_str)
  print("Source dataset:", dataset.len_info(), file=log.v3)
  return dataset
Exemple #4
0
def init(config_filename, cmd_line_opts, dataset_config_str):
  """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if config_filename:
    rnn.initConfig(config_filename, cmd_line_opts)
    rnn.initLog()
  else:
    log.initialize(verbosity=[5])
  print >> log.v3, "CRNN dump-dataset starting up."
  rnn.initFaulthandler()
  rnn.initConfigJsonNetwork()
  if config_filename:
    rnn.initData()
    rnn.printTaskProperties()
    assert isinstance(rnn.train_data, Dataset)
    return rnn.train_data
  else:
    assert dataset_config_str
    dataset = init_dataset_via_str(dataset_config_str)
    print >> log.v3, "Source dataset:", dataset.len_info()
    return dataset
Exemple #5
0
def init(config_filename, cmd_line_opts, dataset_config_str):
    """
  :param str config_filename: global config for CRNN
  :param list[str] cmd_line_opts: options for initConfig method
  :param str dataset_config_str: dataset via init_dataset_via_str()
  """
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if config_filename:
        rnn.initConfig(config_filename, cmd_line_opts)
        rnn.initLog()
    else:
        log.initialize(verbosity=[5])
    print("Returnn hdf_dump starting up.", file=log.v3)
    rnn.initFaulthandler()
    if config_filename:
        rnn.initData()
        rnn.printTaskProperties()
        assert isinstance(rnn.train_data, Dataset)
        return rnn.train_data
    else:
        assert dataset_config_str
        dataset = init_dataset_via_str(dataset_config_str)
        print("Source dataset:", dataset.len_info(), file=log.v3)
        return dataset
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  rnn.config._hack_value_reading_debug()
  from Pretrain import pretrainFromConfig
  pretrain = pretrainFromConfig(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = loadLearningRateControlFromConfig(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epochData:
    print("No epoch data so far.")
    return
  firstEpoch = min(control.epochData.keys())
  if firstEpoch != 1:
    print("Strange, first epoch from epoch data is %i." % firstEpoch)
  print("Error key: %s from %r" % (control.getErrorKey(epoch=firstEpoch), control.epochData[firstEpoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.defaultLearningRate)
  maxEpoch = max(control.epochData.keys())
  for epoch in range(1, maxEpoch + 2):  # all epochs [1..maxEpoch+1]
    oldLearningRate = None
    if epoch in control.epochData:
      oldLearningRate = control.epochData[epoch].learningRate
    if epoch < first_non_pretrain_epoch:
      learningRate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    elif first_non_pretrain_epoch > 1 and epoch == first_non_pretrain_epoch:
      learningRate = control.defaultLearningRate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (epoch, learningRate, oldLearningRate)
    else:
      learningRate = control.calcNewLearnignRateForEpoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    if learningRate < control.minLearningRate:
      learningRate = control.minLearningRate
      s += ", clipped to %s" % learningRate
    s += ", previous relative error: %s" % control.calcRelativeError(epoch - 2, epoch - 1)
    if hasattr(control, "_calcRecentMeanRelativeError"):
      s += ", previous mean relative error: %s" % control._calcRecentMeanRelativeError(epoch)
    print(s)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epochData:
      control.epochData[epoch].learningRate = learningRate
    else:
      control.epochData[epoch] = control.EpochData(learningRate=learningRate)
  print("Finished, last stored epoch was %i." % maxEpoch)
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_crnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
def main():
  global LstmCellTypes
  print("Benchmarking LSTMs.")
  better_exchook.install()
  print("Args:", " ".join(sys.argv))
  arg_parser = ArgumentParser()
  arg_parser.add_argument("cfg", nargs="*", help="opt=value, opt in %r" % sorted(base_settings.keys()))
  arg_parser.add_argument("--no-cpu", action="store_true")
  arg_parser.add_argument("--no-gpu", action="store_true")
  arg_parser.add_argument("--selected", help="comma-separated list from %r" % LstmCellTypes)
  arg_parser.add_argument("--no-setup-tf-thread-pools", action="store_true")
  args = arg_parser.parse_args()
  for opt in args.cfg:
    key, value = opt.split("=", 1)
    assert key in base_settings
    value_type = type(base_settings[key])
    base_settings[key] = value_type(value)
  print("Settings:")
  pprint(base_settings)

  log.initialize(verbosity=[4])
  print("Returnn:", describe_returnn_version(), file=log.v3)
  print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
  print("Python:", sys.version.replace("\n", ""), sys.platform)
  if not args.no_setup_tf_thread_pools:
    setup_tf_thread_pools(log_file=log.v2)
  else:
    print("Not setting up the TF thread pools. Will be done automatically by TF to number of CPU cores.")
  if args.no_gpu:
    print("GPU will not be used.")
  else:
    print("GPU available: %r" % is_gpu_available())
  print_available_devices()

  if args.selected:
    LstmCellTypes = args.selected.split(",")
  benchmarks = {}
  if not args.no_gpu and is_gpu_available():
    for lstm_unit in LstmCellTypes:
      benchmarks["GPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=True)
  if not args.no_cpu:
    for lstm_unit in LstmCellTypes:
      if lstm_unit in GpuOnlyCellTypes:
        continue
      benchmarks["CPU:" + lstm_unit] = benchmark(lstm_unit=lstm_unit, use_gpu=False)

  print("-" * 20)
  print("Settings:")
  pprint(base_settings)
  print("Final results:")
  for t, lstm_unit in sorted([(t, lstm_unit) for (lstm_unit, t) in sorted(benchmarks.items())]):
    print("  %s: %s" % (lstm_unit, hms_fraction(t)))
  print("Done.")
Exemple #9
0
def _init():
    settings_file = base_dir + "/settings.py"
    if os.path.exists(settings_file):
        from lib.utils import load_config_py, ObjAsDict
        load_config_py(settings_file, ObjAsDict(Settings))
    assert os.path.exists(Settings.returnn_path())
    sys.path.insert(0, Settings.returnn_path())  # so that we can import Config
    try:
        import returnn  # new-style RETURNN import
    except ImportError:
        pass
    # init log with default verbosity (3)
    from Log import log
    log.initialize()
Exemple #10
0
def init(configFilename=None):
  rnn.init_better_exchook()
  rnn.init_thread_join_hack()
  if configFilename:
    rnn.init_config(configFilename, command_line_options=[])
    rnn.init_log()
  else:
    log.initialize()
  print("Returnn collect-words starting up.", file=log.v3)
  rnn.init_faulthandler()
  if configFilename:
    rnn.init_config_json_network()
    rnn.init_data()
    rnn.print_task_properties()
def init(configFilename=None):
  rnn.initBetterExchook()
  rnn.initThreadJoinHack()
  if configFilename:
    rnn.initConfig(configFilename, commandLineOptions=[])
    rnn.initLog()
  else:
    log.initialize()
  print >> log.v3, "CRNN collect-orth-symbols starting up."
  rnn.initFaulthandler()
  if configFilename:
    rnn.initConfigJsonNetwork()
    rnn.initData()
    rnn.printTaskProperties()
Exemple #12
0
def init(configFilename=None):
    rnn.init_better_exchook()
    rnn.init_thread_join_hack()
    if configFilename:
        rnn.init_config(configFilename, command_line_options=[])
        rnn.init_log()
    else:
        log.initialize()
    print("Returnn collect-words starting up.", file=log.v3)
    rnn.init_faulthandler()
    if configFilename:
        rnn.init_config_json_network()
        rnn.init_data()
        rnn.print_task_properties()
Exemple #13
0
  def load_state_tying(self, state_tying_name='state-tying.txt.gz'):
    """
    loads a state tying map from a file, loads the file and returns its content
    where:
      statetying.allo_map important
    :param state_tying_name: holds the path and name of the state tying file
    """
    from LmDataset import StateTying
    from os.path import isfile
    from Log import log

    log.initialize(verbosity=[5])
    assert isfile(state_tying_name), "State tying file does not exist"
    self.state_tying = StateTying(state_tying_name)
def init(configFilename=None):
    rnn.initBetterExchook()
    rnn.initThreadJoinHack()
    if configFilename:
        rnn.initConfig(configFilename, commandLineOptions=[])
        rnn.initLog()
    else:
        log.initialize()
    print("CRNN collect-orth-symbols starting up.", file=log.v3)
    rnn.initFaulthandler()
    if configFilename:
        rnn.initConfigJsonNetwork()
        rnn.initData()
        rnn.printTaskProperties()
Exemple #15
0
  def _load_lexicon(self):
    '''
    loads a lexicon from a file, loads the xml and returns its conent
    where:
      lex.lemmas and lex.phonemes important
    '''
    from os.path import isfile
    from Log import log
    from LmDataset import Lexicon

    assert isfile(self.lexicon_name), "Lexicon does not exists"

    log.initialize(verbosity=[5])
    self.lexicon = Lexicon(self.lexicon_name)
Exemple #16
0
def demo():
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options]" % __file__)
    print("example usage: python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001" % __file__)
  rnn.initConfig(commandLineOptions=sys.argv[1:])
  from Pretrain import pretrainFromConfig
  pretrain = pretrainFromConfig(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  rnn.config._hack_value_reading_debug()
  log.initialize(verbosity=[5])
  control = loadLearningRateControlFromConfig(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epochData:
    print("No epoch data so far.")
    return
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.defaultLearningRate)
  maxEpoch = max(control.epochData.keys())
  for epoch in range(1, maxEpoch + 2):  # all epochs [1..maxEpoch+1]
    oldLearningRate = None
    if epoch in control.epochData:
      oldLearningRate = control.epochData[epoch].learningRate
    if epoch < first_non_pretrain_epoch:
      learningRate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    elif first_non_pretrain_epoch > 1 and epoch == first_non_pretrain_epoch:
      learningRate = control.defaultLearningRate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (epoch, learningRate, oldLearningRate)
    else:
      learningRate = control.calcLearningRateForEpoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learningRate, oldLearningRate)
    if learningRate < control.minLearningRate:
      learningRate = control.minLearningRate
      s += ", clipped to %s" % learningRate
    s += ", previous relative error: %s" % control.calcRelativeError(epoch - 2, epoch - 1)
    print(s)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epochData:
      control.epochData[epoch].learningRate = learningRate
    else:
      control.epochData[epoch] = control.EpochData(learningRate=learningRate)
  print("Finished, last stored epoch was %i." % maxEpoch)
Exemple #17
0
  def load_lexicon(self, lexicon_name='recog.150k.final.lex.gz'):
    """
    loads Lexicon
    takes a file, loads the xml and returns as Lexicon
    where:
      lex.lemmas and lex.phonemes important
    :param str lexicon_name: holds the path and name of the lexicon file
    """
    from LmDataset import Lexicon
    from os.path import isfile
    from Log import log

    log.initialize(verbosity=[5])
    assert isfile(lexicon_name), "Lexicon file does not exist"
    self.lexicon = Lexicon(lexicon_name)
Exemple #18
0
def _main():
  import better_exchook
  better_exchook.install()
  from argparse import ArgumentParser
  arg_parser = ArgumentParser()
  arg_parser.add_argument(
    "lm_dataset", help="Python eval string, should eval to dict" +
                       ", or otherwise filename, and will just dump")
  arg_parser.add_argument("--post_processor", nargs="*")
  args = arg_parser.parse_args()
  if not args.lm_dataset.startswith("{") and os.path.isfile(args.lm_dataset):
    callback = print
    if args.post_processor:
      pp = get_post_processor_function(args.post_processor)
      callback = lambda text: print(pp(text))
    iter_corpus(args.lm_dataset, callback)
    sys.exit(0)

  log.initialize(verbosity=[5])
  print("LmDataset demo startup")
  kwargs = eval(args.lm_dataset)
  assert isinstance(kwargs, dict), "arg should be str of dict: %s" % args.lm_dataset
  print("Creating LmDataset with kwargs=%r ..." % kwargs)
  dataset = LmDataset(**kwargs)
  print("init_seq_order ...")
  dataset.init_seq_order(epoch=1)

  seq_idx = 0
  last_log_time = time.time()
  print("start iterating through seqs ...")
  while dataset.is_less_than_num_seqs(seq_idx):
    if seq_idx == 0:
      print("load_seqs with seq_idx=%i ...." % seq_idx)
    dataset.load_seqs(seq_idx, seq_idx + 1)

    if time.time() - last_log_time > 2.0:
      last_log_time = time.time()
      print("Loading %s progress, %i/%i (%.0f%%) seqs loaded (%.0f%% skipped), (%.0f%% unknown) total syms %i ..." % (
            dataset.__class__.__name__, dataset.next_orth_idx, dataset.estimated_num_seqs,
            100.0 * dataset.next_orth_idx / dataset.estimated_num_seqs,
            100.0 * dataset.num_skipped / (dataset.next_orth_idx or 1),
            100.0 * dataset.num_unknown / dataset._num_timesteps_accumulated["data"],
            dataset._num_timesteps_accumulated["data"]))

    seq_idx += 1

  print("finished iterating, num seqs: %i" % seq_idx)
  print("dataset len:", dataset.len_info())
Exemple #19
0
def __load_lexicon(lexFile):
    '''
  loads a lexicon from a file, loads the xml and returns its conent
  :param lexFile: lexicon file with xml structure
  :return lex: variable with xml structure
  where:
    lex.lemmas and lex.phonemes important
  '''
    from os.path import isfile
    from Log import log
    from LmDataset import Lexicon

    assert isfile(lexFile), "Lexicon does not exists"

    log.initialize(verbosity=[5])
    lex = Lexicon(lexFile)

    return lex
Exemple #20
0
    def _load_lexicon(self, reload=False):
        '''
    loads a lexicon from a file, loads the xml and returns its content
    where:
      lex.lemmas and lex.phonemes important
    :param bool reload: should lexicon be reloaded
    '''
        from LmDataset import Lexicon
        if not isinstance(self.lexicon, Lexicon):
            reload = True

        if reload:
            from os.path import isfile
            from Log import log

            assert isfile(self.lexicon_name), "Lexicon does not exists"

            log.initialize(verbosity=[5])

            self.lexicon = Lexicon(self.lexicon_name)
Exemple #21
0
def _main(argv):
  import better_exchook
  better_exchook.install()
  log.initialize(verbosity=[5])
  dataset = LmDataset(**eval(argv[0]))
  dataset.init_seq_order(epoch=1)

  seq_idx = 0
  last_log_time = time.time()
  while dataset.is_less_than_num_seqs(seq_idx):
    dataset.load_seqs(seq_idx, seq_idx + 1)

    if time.time() - last_log_time > 2.0:
      last_log_time = time.time()
      print >> log.v5, "Loading %s progress, %i/%i (%.0f%%) seqs loaded (%.0f%% skipped), total syms %i ..." % (
                       dataset.__class__.__name__, dataset.next_orth_idx, dataset.estimated_num_seqs,
                       100.0 * dataset.next_orth_idx / dataset.estimated_num_seqs,
                       100.0 * dataset.num_skipped / (dataset.next_orth_idx or 1),
                       dataset._num_timesteps_accumulated["data"])

    seq_idx += 1

  print >>log.v3, "dataset len:", dataset.len_info()
import NativeOp
import numpy as np
from numpy.testing.utils import assert_almost_equal
import theano.tensor as T
import TheanoUtil
f32 = "float32"

import better_exchook
from Log import log

better_exchook.replace_traceback_format_tb()
log.initialize()  # some code might need it


def test_sparse_to_dense():
    n_time = 3
    n_batch = 2
    n_dim = 5
    s0 = np.array([[0, 0], [0, 1], [1, 1], [1, 2], [1, 2], [2, 2], [2, 2]],
                  dtype=f32)
    s1 = np.array([[1, 2], [2, 3], [1, 1], [2, 0], [4, 1], [3, 3], [4, 4]],
                  dtype=f32)
    w = np.array([[1, 2], [2, 1], [1, 2], [3, 4], [5, 6], [7, 8], [9, 9]],
                 dtype=f32)
    m = np.array([[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0]],
                 dtype=f32)
    W = np.array([[[0, 1, 2, 0, 0], [0, 0, 2, 0, 0]],
                  [[0, 1, 3, 0, 5], [0, 2, 0, 1, 0]],
                  [[0, 0, 0, 7, 9], [4, 6, 0, 8, 0]]],
                 dtype=f32)
    assert W.shape == (n_time, n_batch, n_dim)
sys.path += ["."]  # Python 3 hack

import NativeOp
import numpy
from numpy.testing.utils import assert_almost_equal
import theano.tensor as T
import TheanoUtil
import sys
f32 = "float32"


import better_exchook
from Log import log

better_exchook.replace_traceback_format_tb()
log.initialize()  # some code might need it
TheanoUtil.monkey_patches()


chunk = NativeOp.chunk
unchunk = NativeOp.unchunk
naive_chunk_start_frames = NativeOp.Chunking.naive_chunk_start_frames


def get_num_chunks(n_time, chunk_size, chunk_step):
  return len(naive_chunk_start_frames(n_time, chunk_size, chunk_step))


def naive_chunk(x, chunk_size, chunk_step):
  if x.ndim == 3:  # (time,batch,dim)
    if x.shape[1] == 1:
Exemple #24
0
def demo():
    print("SprintDataset demo.")
    from argparse import ArgumentParser
    from Util import hms, progress_bar_with_time
    from Log import log
    from Config import Config
    from Dataset import init_dataset
    arg_parser = ArgumentParser()
    arg_parser.add_argument("--config",
                            help="config with ExternSprintDataset",
                            required=True)
    arg_parser.add_argument("--sprint_cache_dataset",
                            help="kwargs dict for SprintCacheDataset",
                            required=True)
    arg_parser.add_argument("--max_num_seqs", default=sys.maxint, type=int)
    arg_parser.add_argument("--action",
                            default="compare",
                            help="compare or benchmark")
    args = arg_parser.parse_args()
    log.initialize(verbosity=[4])
    sprint_cache_dataset_kwargs = eval(args.sprint_cache_dataset)
    assert isinstance(sprint_cache_dataset_kwargs, dict)
    sprint_cache_dataset = SprintCacheDataset(**sprint_cache_dataset_kwargs)
    print("SprintCacheDataset: %r" % sprint_cache_dataset)
    config = Config()
    config.load_file(args.config)
    dataset = init_dataset(config.typed_value("train"))
    print("Dataset via config: %r" % dataset)
    assert sprint_cache_dataset.num_inputs == dataset.num_inputs
    assert tuple(sprint_cache_dataset.num_outputs["classes"]) == tuple(
        dataset.num_outputs["classes"])
    sprint_cache_dataset.init_seq_order(epoch=1)

    if args.action == "compare":
        print("Iterating through dataset...")
        seq_idx = 0
        dataset.init_seq_order(epoch=1)
        while seq_idx < args.max_num_seqs:
            if not dataset.is_less_than_num_seqs(seq_idx):
                break
            dataset.load_seqs(seq_idx, seq_idx + 1)
            tag = dataset.get_tag(seq_idx)
            assert not tag.startswith(
                "seq-"), "dataset does not provide tag-names for seqs"
            dataset_seq = sprint_cache_dataset.get_dataset_seq_for_name(tag)
            data = dataset.get_data(seq_idx, "data")
            targets = dataset.get_data(seq_idx, "classes")
            assert data.shape == dataset_seq.features.shape
            assert targets.shape == dataset_seq.targets["classes"].shape
            assert numpy.allclose(data, dataset_seq.features)
            assert numpy.allclose(targets, dataset_seq.targets["classes"])
            seq_idx += 1
            progress_bar_with_time(dataset.get_complete_frac(seq_idx))

        print("Finished through dataset. Num seqs: %i" % seq_idx)
        print("SprintCacheDataset has num seqs: %i." %
              sprint_cache_dataset.num_seqs)

    elif args.action == "benchmark":
        print("Iterating through dataset...")
        start_time = time.time()
        seq_tags = []
        seq_idx = 0
        dataset.init_seq_order(epoch=1)
        while seq_idx < args.max_num_seqs:
            if not dataset.is_less_than_num_seqs(seq_idx):
                break
            dataset.load_seqs(seq_idx, seq_idx + 1)
            tag = dataset.get_tag(seq_idx)
            assert not tag.startswith(
                "seq-"), "dataset does not provide tag-names for seqs"
            seq_tags.append(tag)
            dataset.get_data(seq_idx, "data")
            dataset.get_data(seq_idx, "classes")
            seq_idx += 1
            progress_bar_with_time(dataset.get_complete_frac(seq_idx))
        print("Finished through dataset. Num seqs: %i, time: %f" %
              (seq_idx, time.time() - start_time))
        print("SprintCacheDataset has num seqs: %i." %
              sprint_cache_dataset.num_seqs)
        if hasattr(dataset, "exit_handler"):
            dataset.exit_handler()
        else:
            print("No way to stop any background tasks.")
        del dataset

        start_time = time.time()
        print("Iterating through SprintCacheDataset...")
        for i, tag in enumerate(seq_tags):
            sprint_cache_dataset.get_dataset_seq_for_name(tag)
            progress_bar_with_time(float(i) / len(seq_tags))
        print("Finished through SprintCacheDataset. time: %f" %
              (time.time() - start_time, ))

    else:
        raise Exception("invalid action: %r" % args.action)
Exemple #25
0
def init_log(config):
  log_dir = config.dir("log_dir", "logs")
  model = config.unicode("model")
  filename = log_dir + model + ".log"
  verbosity = config.int("log_verbosity", 3)
  log.initialize([filename], [verbosity], [])
def demo():
  """
  Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set,
  given some config.
  """
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__)
    print(
      ("example usage: "
       "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__)
  rnn.init_config(command_line_options=sys.argv[1:])
  # noinspection PyProtectedMember
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.init_log()
  rnn.init_backend_engine()
  check_lr = rnn.config.bool("check_learning_rates", False)
  from Pretrain import pretrain_from_config
  pretrain = pretrain_from_config(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = load_learning_rate_control_from_config(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epoch_data:
    print("No epoch data so far.")
    return
  first_epoch = min(control.epoch_data.keys())
  if first_epoch != 1:
    print("Strange, first epoch from epoch data is %i." % first_epoch)
  print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate)
  max_epoch = max(control.epoch_data.keys())
  for epoch in range(1, max_epoch + 2):  # all epochs [1..max_epoch+1]
    old_learning_rate = None
    if epoch in control.epoch_data:
      old_learning_rate = control.epoch_data[epoch].learning_rate
    if epoch < first_non_pretrain_epoch:
      learning_rate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    elif 1 < first_non_pretrain_epoch == epoch:
      learning_rate = control.default_learning_rate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (
        epoch, learning_rate, old_learning_rate)
    else:
      learning_rate = control.calc_new_learning_rate_for_epoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    if learning_rate < control.min_learning_rate:
      learning_rate = control.min_learning_rate
      s += ", clipped to %s" % learning_rate
    s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1)
    if hasattr(control, "_calc_recent_mean_relative_error"):
      # noinspection PyProtectedMember
      s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch)
    print(s)
    if check_lr and old_learning_rate is not None:
      if old_learning_rate != learning_rate:
        print("Learning rate is different in epoch %i!" % epoch)
        sys.exit(1)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epoch_data:
      control.epoch_data[epoch].learning_rate = learning_rate
    else:
      control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate)
  print("Finished, last stored epoch was %i." % max_epoch)
Exemple #27
0
import contextlib
import unittest
import numpy.testing
from pprint import pprint
import better_exchook
better_exchook.replace_traceback_format_tb()

from Config import Config
from TFNetwork import *
from TFNetworkLayer import *
from TFEngine import *
from Log import log
import TFUtil
TFUtil.debug_register_better_repr()

log.initialize(verbosity=[5])

@contextlib.contextmanager
def make_scope():
  with tf.Graph().as_default() as graph:
    with tf.Session(graph=graph) as session:
      yield session

network = {}
_last = "data"

def build_resnet(conv_time_dim):
  # network
  # (also defined by num_inputs & num_outputs)
  dropout = 0
  L2 = 0.1
Exemple #28
0
def initLog():
    logs = config.list('log', [])
    log_verbosity = config.int_list('log_verbosity', [])
    log_format = config.list('log_format', [])
    log.initialize(logs=logs, verbosity=log_verbosity, formatter=log_format)
Exemple #29
0
import sys
sys.path += ["."]  # Python 3 hack
from TFEngine import *
import Util
import TFUtil
TFUtil.debugRegisterBetterRepr()
from Config import Config
from nose.tools import assert_equal, assert_is_instance
import numpy
import numpy.testing
import os
from pprint import pprint
import better_exchook
better_exchook.replace_traceback_format_tb()
from Log import log
log.initialize(verbosity=[5])

session = tf.InteractiveSession()


def test_DataProvider():
    """
  :param Dataset.Dataset dataset:
  :param int seq_idx:
  :param str|None output_layer_name: e.g. "output". if not set, will read from config "forward_output_layer"
  :return: numpy array, output in time major format (time,batch,dim)
  :rtype: numpy.ndarray
  """
    from GeneratingDataset import DummyDataset
    seq_len = 5
    n_data_dim = 2
Exemple #30
0
import theano.printing
from pprint import pprint
from GeneratingDataset import Task12AXDataset
from Updater import Updater
from Device import Device
from Util import NumbersDict
from Config import Config
from NetworkHiddenLayer import DumpLayer
import rnn
import EngineUtil
import Network
import better_exchook
from Log import log

better_exchook.replace_traceback_format_tb()
log.initialize()  # some code needs it

# Some code uses get_global_config().
# Not sure about the most clean solution.
rnn.config = Config()


class DummyDevice:
  """
  Behave like Device.
  Only needed for assign_dev_data.
  """
  blocking = True
  used_data_keys = ("data", "classes")
  targets = None
  output_index = None
Exemple #31
0
def initLog():
  logs = config.list('log', [])
  log_verbosity = config.int_list('log_verbosity', [])
  log_format = config.list('log_format', [])
  log.initialize(logs = logs, verbosity = log_verbosity, formatter = log_format)
Exemple #32
0
from nose.tools import assert_equal, assert_is_instance, assert_in, assert_not_in, assert_true, assert_false
from GeneratingDataset import GeneratingDataset, DummyDataset
from EngineBatch import Batch
from Dataset import DatasetSeq
from Log import log
import numpy as np

log.initialize()


def test_generate_batches():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=20)
  dataset.init_seq_order(1)
  batch_gen = dataset.generate_batches(recurrent_net=False, max_seqs=2, batch_size=5)
  while batch_gen.has_more():
    batch_gen.peek_next_n(1)
    batch_gen.advance(1)

def test_generate_batches_recurrent():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=20)
  dataset.init_seq_order(1)
  batch_gen = dataset.generate_batches(recurrent_net=True, max_seqs=2, batch_size=5)
  while batch_gen.has_more():
    batch_gen.peek_next_n(1)
    batch_gen.advance(1)

def test_iterate_seqs_no_chunking_1():
  dataset = DummyDataset(input_dim=2, output_dim=3, num_seqs=2, seq_len=11)
  dataset.init_seq_order(1)
  seqs = list(dataset._iterate_seqs(chunk_size=0, chunk_step=0))
Exemple #33
0
def demo():
  """
  Demo.
  """
  print("SprintDataset demo.")
  from argparse import ArgumentParser
  from Util import progress_bar_with_time
  from Log import log
  from Config import Config
  from Dataset import init_dataset
  arg_parser = ArgumentParser()
  arg_parser.add_argument("--config", help="config with ExternSprintDataset", required=True)
  arg_parser.add_argument("--sprint_cache_dataset", help="kwargs dict for SprintCacheDataset", required=True)
  arg_parser.add_argument("--max_num_seqs", default=sys.maxsize, type=int)
  arg_parser.add_argument("--action", default="compare", help="compare or benchmark")
  args = arg_parser.parse_args()
  log.initialize(verbosity=[4])
  sprint_cache_dataset_kwargs = eval(args.sprint_cache_dataset)
  assert isinstance(sprint_cache_dataset_kwargs, dict)
  sprint_cache_dataset = SprintCacheDataset(**sprint_cache_dataset_kwargs)
  print("SprintCacheDataset: %r" % sprint_cache_dataset)
  config = Config()
  config.load_file(args.config)
  dataset = init_dataset(config.typed_value("train"))
  print("Dataset via config: %r" % dataset)
  assert sprint_cache_dataset.num_inputs == dataset.num_inputs
  assert tuple(sprint_cache_dataset.num_outputs["classes"]) == tuple(dataset.num_outputs["classes"])
  sprint_cache_dataset.init_seq_order(epoch=1)

  if args.action == "compare":
    print("Iterating through dataset...")
    seq_idx = 0
    dataset.init_seq_order(epoch=1)
    while seq_idx < args.max_num_seqs:
      if not dataset.is_less_than_num_seqs(seq_idx):
        break
      dataset.load_seqs(seq_idx, seq_idx + 1)
      tag = dataset.get_tag(seq_idx)
      assert not tag.startswith("seq-"), "dataset does not provide tag-names for seqs"
      dataset_seq = sprint_cache_dataset.get_dataset_seq_for_name(tag)
      data = dataset.get_data(seq_idx, "data")
      targets = dataset.get_data(seq_idx, "classes")
      assert data.shape == dataset_seq.features["data"].shape
      assert targets.shape == dataset_seq.features["classes"].shape
      assert numpy.allclose(data, dataset_seq.features["data"])
      assert numpy.allclose(targets, dataset_seq.features["classes"])
      seq_idx += 1
      progress_bar_with_time(dataset.get_complete_frac(seq_idx))

    print("Finished through dataset. Num seqs: %i" % seq_idx)
    print("SprintCacheDataset has num seqs: %i." % sprint_cache_dataset.num_seqs)

  elif args.action == "benchmark":
    print("Iterating through dataset...")
    start_time = time.time()
    seq_tags = []
    seq_idx = 0
    dataset.init_seq_order(epoch=1)
    while seq_idx < args.max_num_seqs:
      if not dataset.is_less_than_num_seqs(seq_idx):
        break
      dataset.load_seqs(seq_idx, seq_idx + 1)
      tag = dataset.get_tag(seq_idx)
      assert not tag.startswith("seq-"), "dataset does not provide tag-names for seqs"
      seq_tags.append(tag)
      dataset.get_data(seq_idx, "data")
      dataset.get_data(seq_idx, "classes")
      seq_idx += 1
      progress_bar_with_time(dataset.get_complete_frac(seq_idx))
    print("Finished through dataset. Num seqs: %i, time: %f" % (seq_idx, time.time() - start_time))
    print("SprintCacheDataset has num seqs: %i." % sprint_cache_dataset.num_seqs)
    if hasattr(dataset, "exit_handler"):
      dataset.exit_handler()
    else:
      print("No way to stop any background tasks.")
    del dataset

    start_time = time.time()
    print("Iterating through SprintCacheDataset...")
    for i, tag in enumerate(seq_tags):
      sprint_cache_dataset.get_dataset_seq_for_name(tag)
      progress_bar_with_time(float(i) / len(seq_tags))
    print("Finished through SprintCacheDataset. time: %f" % (time.time() - start_time,))

  else:
    raise Exception("invalid action: %r" % args.action)
Exemple #34
0
import theano.printing
from pprint import pprint
from GeneratingDataset import Task12AXDataset
from Updater import Updater
from Device import Device
from Util import NumbersDict
from Config import Config
from NetworkHiddenLayer import DumpLayer
import rnn
import EngineUtil
import Network
import better_exchook
from Log import log

better_exchook.replace_traceback_format_tb()
log.initialize()  # some code needs it

# Some code uses get_global_config().
# Not sure about the most clean solution.
rnn.config = Config()


class DummyDevice:
    """
  Behave like Device.
  Only needed for assign_dev_data.
  """
    blocking = True
    used_data_keys = ("data", "classes")
    targets = None
    output_index = None
def demo():
  """
  Demo run. Given some learning rate file (with scores / existing lrs), will calculate how lrs would have been set,
  given some config.
  """
  import better_exchook
  better_exchook.install()
  import rnn
  import sys
  if len(sys.argv) <= 1:
    print("usage: python %s [config] [other options] [++check_learning_rates 1]" % __file__)
    print(
      ("example usage: "
       "python %s ++learning_rate_control newbob ++learning_rate_file newbob.data ++learning_rate 0.001") % __file__)
  rnn.init_config(command_line_options=sys.argv[1:])
  # noinspection PyProtectedMember
  rnn.config._hack_value_reading_debug()
  rnn.config.update({"log": []})
  rnn.init_log()
  rnn.init_backend_engine()
  check_lr = rnn.config.bool("check_learning_rates", False)
  from Pretrain import pretrain_from_config
  pretrain = pretrain_from_config(rnn.config)
  first_non_pretrain_epoch = 1
  pretrain_learning_rate = None
  if pretrain:
    first_non_pretrain_epoch = pretrain.get_train_num_epochs() + 1
  log.initialize(verbosity=[5])
  control = load_learning_rate_control_from_config(rnn.config)
  print("LearningRateControl: %r" % control)
  if not control.epoch_data:
    print("No epoch data so far.")
    return
  first_epoch = min(control.epoch_data.keys())
  if first_epoch != 1:
    print("Strange, first epoch from epoch data is %i." % first_epoch)
  print("Error key: %s from %r" % (control.get_error_key(epoch=first_epoch), control.epoch_data[first_epoch].error))
  if pretrain:
    pretrain_learning_rate = rnn.config.float('pretrain_learning_rate', control.default_learning_rate)
  max_epoch = max(control.epoch_data.keys())
  for epoch in range(1, max_epoch + 2):  # all epochs [1..max_epoch+1]
    old_learning_rate = None
    if epoch in control.epoch_data:
      old_learning_rate = control.epoch_data[epoch].learning_rate
    if epoch < first_non_pretrain_epoch:
      learning_rate = pretrain_learning_rate
      s = "Pretrain epoch %i, fixed learning rate: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    elif 1 < first_non_pretrain_epoch == epoch:
      learning_rate = control.default_learning_rate
      s = "First epoch after pretrain, epoch %i, fixed learning rate: %s (was %s)" % (
        epoch, learning_rate, old_learning_rate)
    else:
      learning_rate = control.calc_new_learning_rate_for_epoch(epoch)
      s = "Calculated learning rate for epoch %i: %s (was: %s)" % (epoch, learning_rate, old_learning_rate)
    if learning_rate < control.min_learning_rate:
      learning_rate = control.min_learning_rate
      s += ", clipped to %s" % learning_rate
    s += ", previous relative error: %s" % control.calc_relative_error(epoch - 2, epoch - 1)
    if hasattr(control, "_calc_recent_mean_relative_error"):
      # noinspection PyProtectedMember
      s += ", previous mean relative error: %s" % control._calc_recent_mean_relative_error(epoch)
    print(s)
    if check_lr and old_learning_rate is not None:
      if old_learning_rate != learning_rate:
        print("Learning rate is different in epoch %i!" % epoch)
        sys.exit(1)
    # Overwrite new learning rate so that the calculation for further learning rates stays consistent.
    if epoch in control.epoch_data:
      control.epoch_data[epoch].learning_rate = learning_rate
    else:
      control.epoch_data[epoch] = control.EpochData(learningRate=learning_rate)
  print("Finished, last stored epoch was %i." % max_epoch)