Exemple #1
0
  def perform(self, node, inputs, output_storage):
    start_time = time.time()
    log_posteriors, seq_lengths = inputs

    if numpy.isnan(log_posteriors).any():
      print >> log.v1, 'SprintErrorSigOp: log_posteriors contain NaN!'
    if numpy.isinf(log_posteriors).any():
      print >> log.v1, 'SprintErrorSigOp: log_posteriors contain Inf!'
      #numpy.set_printoptions(threshold=numpy.nan)
      print >> log.v1, 'SprintErrorSigOp: log_posteriors:', log_posteriors

    if self.sprint_instance_pool is None:
      print >> log.v3, "SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts
      self.sprint_instance_pool = SprintInstancePool.get_global_instance(sprint_opts=self.sprint_opts)

    loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(log_posteriors, seq_lengths)
    #print >> log.v4, 'loss:', loss, 'errsig:', errsig
    output_storage[0][0] = loss
    output_storage[1][0] = errsig

    print >> log.v5, 'SprintErrorSigOp: avg frame loss for segments:', loss.sum() / seq_lengths.sum()
    end_time = time.time()
    if self.debug_perform_time is None:
      from Config import get_global_config
      config = get_global_config()
      self.debug_perform_time = config.bool("debug_SprintErrorSigOp_perform_time", False)
    if self.debug_perform_time:
      print >>log.v1, "SprintErrorSigOp perform time:", end_time - start_time
      from Device import deviceInstance
      assert deviceInstance.is_device_proc()
      forward_time = start_time - deviceInstance.compute_start_time
      print >> log.v1, "SprintErrorSigOp forward time:", forward_time
    def perform(self, node, inputs, output_storage, params=None):
      start_time = time.time()
      log_posteriors, seq_lengths = inputs

      if numpy.isnan(log_posteriors).any():
        print('SprintErrorSigOp: log_posteriors contain NaN!', file=log.v1)
      if numpy.isinf(log_posteriors).any():
        print('SprintErrorSigOp: log_posteriors contain Inf!', file=log.v1)
        print('SprintErrorSigOp: log_posteriors:', log_posteriors, file=log.v1)

      if self.sprint_instance_pool is None:
        print("SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts, file=log.v3)
        self.sprint_instance_pool = SprintInstancePool.get_global_instance(sprint_opts=self.sprint_opts)

      assert isinstance(self.sprint_instance_pool, SprintInstancePool)  # PyCharm confused otherwise
      loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(log_posteriors, seq_lengths)
      output_storage[0][0] = loss
      output_storage[1][0] = errsig

      print('SprintErrorSigOp: avg frame loss for segments:', loss.sum() / seq_lengths.sum(), file=log.v5)
      end_time = time.time()
      if self.debug_perform_time is None:
        from Config import get_global_config
        config = get_global_config()
        self.debug_perform_time = config.bool("debug_SprintErrorSigOp_perform_time", False)
      if self.debug_perform_time:
        print("SprintErrorSigOp perform time:", end_time - start_time, file=log.v1)
        from Device import deviceInstance
        assert deviceInstance.is_device_proc()
        forward_time = start_time - deviceInstance.compute_start_time
        print("SprintErrorSigOp forward time:", forward_time, file=log.v1)
Exemple #3
0
 def __init__(self, sprintExecPath, minPythonControlVersion=2, sprintConfigStr="", sprintControlConfig=None, usePythonSegmentOrder=True):
   """
   :param str sprintExecPath: this executable will be called for the sub proc.
   :param int minPythonControlVersion: will be checked in the subprocess. via Sprint PythonControl
   :param str sprintConfigStr: passed to Sprint as command line args.
     can have "config:" prefix - in that case, looked up in config.
     handled via eval_shell_str(), can thus have lazy content (if it is callable, will be called).
   :param dict[str]|None sprintControlConfig: passed to SprintControl.init().
   """
   assert os.path.exists(sprintExecPath)
   self.sprintExecPath = sprintExecPath
   self.minPythonControlVersion = minPythonControlVersion
   if sprintConfigStr.startswith("config:"):
     from Config import get_global_config
     config = get_global_config()
     assert config
     sprintConfigStr = config.typed_dict[sprintConfigStr[len("config:"):]]
   self.sprintConfig = eval_shell_str(sprintConfigStr)
   self.sprintControlConfig = sprintControlConfig
   self.usePythonSegmentOrder = usePythonSegmentOrder
   self.child_pid = None
   self.parent_pid = os.getpid()
   # There is no generic way to see whether Python is exiting.
   # This is our workaround. We check for it in self.run_inner().
   self.python_exit = False
   atexit.register(self.exit_handler)
   self._cur_seg_name = None
   self._cur_posteriors_shape = None
   self.is_calculating = False
   self.init()
Exemple #4
0
def init_dataset(kwargs):
    """
  :param dict[str]|str|(()->dict[str]) kwargs:
  :rtype: Dataset
  """
    assert kwargs
    if callable(kwargs):
        return init_dataset(kwargs())
    if isinstance(kwargs, (str, unicode)):
        if kwargs.startswith("config:"):
            from Config import get_global_config
            config = get_global_config()
            assert config
            return init_dataset(config.opt_typed_value(
                kwargs[len("config:"):]))
        return init_dataset_via_str(config_str=kwargs)
    kwargs = kwargs.copy()
    assert "class" in kwargs
    clazz_name = kwargs.pop("class")
    clazz = get_dataset_class(clazz_name)
    if not clazz:
        raise Exception("Dataset class %r not found" % clazz_name)
    files = kwargs.pop("files", [])
    obj = clazz(**kwargs)
    assert isinstance(obj, Dataset)
    if files:
        from HDFDataset import HDFDataset, NextGenHDFDataset
        assert isinstance(obj, (HDFDataset, NextGenHDFDataset))
        for f in files:
            obj.add_file(f)
    obj.initialize()
    return obj
Exemple #5
0
 def __init__(self, config=None, extern_data=None, rnd_seed=42, train_flag=False, parent=None):
   """
   :param Config.Config config: only needed to init extern_data if not specified explicitly
   :param ExternData|None extern_data:
   :param int rnd_seed:
   :param bool|tf.Tensor train_flag: True if we want to use this model in training, False if in eval, or dynamic
   :param TFNetworkLayer.LayerBase|None parent:
   """
   if extern_data is None:
     extern_data = ExternData()
     if not config:
       from Config import get_global_config
       config = get_global_config()
     extern_data.init_from_config(config)
   self.extern_data = extern_data
   self.used_data_keys = set()
   self.rnd_seed = rnd_seed
   self.random = numpy.random.RandomState(rnd_seed)
   self.train_flag = train_flag
   self.parent = parent
   self._selected_train_layers = None
   self.layers_desc = {}  # type: dict[str,dict[str]]
   self.layers = {}  # type: dict[str,LayerBase]
   self.loss_by_layer = {}  # type: dict[str,tf.Tensor]
   self.error_by_layer = {}  # type: dict[str,tf.Tensor]
   self.total_loss = None  # type: tf.Tensor
   self.total_constraints = None  # type: tf.Tensor
   self.total_objective = None  # type: tf.Tensor
   self.global_train_step = tf.Variable(
     name="global_step", initial_value=0, dtype="int64", collections=[tf.GraphKeys.GLOBAL_STEP], trainable=False)
   self.saver = None  # type: tf.train.Saver
   self.recurrent = False
   self._assigner_cache = {}  # type: dict[tf.Variable,VariableAssigner]
   self.concat_sources_dropout_cache = {}  # type: dict[(tuple[LayerBase],float),Data]
Exemple #6
0
 def __init__(self, config=None, extern_data=None, rnd_seed=42,
              train_flag=False, search_flag=False,
              parent_layer=None, parent_net=None,
              name=None):
   """
   :param Config.Config config: only needed to init extern_data if not specified explicitly
   :param ExternData|None extern_data:
   :param int rnd_seed:
   :param bool|tf.Tensor train_flag: True if we want to use this model in training, False if in eval, or dynamic
   :param TFNetworkLayer.LayerBase|None parent_layer:
   :param TFNetwork parent_net:
   :param str name: only for debugging
   """
   if not name:
     from Util import try_get_caller_name
     name = "<network via %s>" % try_get_caller_name(fallback="<unknown>")
   self.name = name
   if extern_data is None:
     extern_data = ExternData()
     if not config:
       from Config import get_global_config
       config = get_global_config()
     extern_data.init_from_config(config)
   self.extern_data = extern_data
   self._config = config
   self.used_data_keys = set()
   self.rnd_seed = rnd_seed
   self.random = numpy.random.RandomState(rnd_seed)
   assert isinstance(train_flag, (bool, tf.Tensor))
   self.train_flag = train_flag
   self.search_flag = search_flag
   self.parent_layer = parent_layer
   if not parent_net and parent_layer:
     parent_net = parent_layer.network
   self.parent_net = parent_net
   self._selected_train_layers = None
   self.layers_desc = {}  # type: dict[str,dict[str]]
   self.layers = {}  # type: dict[str,LayerBase]
   self.loss_by_layer = {}  # type: dict[str,tf.Tensor]
   self.error_by_layer = {}  # type: dict[str,tf.Tensor]
   self.total_loss = None  # type: tf.Tensor
   self.total_constraints = None  # type: tf.Tensor
   self.total_objective = None  # type: tf.Tensor
   if parent_net:
     self.global_train_step = parent_net.global_train_step
   else:
     self.global_train_step = tf.Variable(
       name="global_step", initial_value=0, dtype="int64", collections=[tf.GraphKeys.GLOBAL_STEP], trainable=False)
   self.saver = None  # type: tf.train.Saver
   self.recurrent = False
   self._assigner_cache = {}  # type: dict[tf.Variable,VariableAssigner]
   self.concat_sources_dropout_cache = {}  # type: dict[(tuple[LayerBase],float),Data]
Exemple #7
0
def init_dataset_via_str(config_str,
                         config=None,
                         cache_byte_size=None,
                         **kwargs):
    """
  :param str config_str: hdf-files, or "LmDataset:..." or so
  :param Config.Config|None config: optional, only for "sprint:..."
  :param int|None cache_byte_size: optional, only for HDFDataset
  :rtype: Dataset
  """
    kwargs = kwargs.copy()
    if 'window' not in kwargs and config and config.has('window'):
        kwargs['window'] = config.int('window', 1)
    from HDFDataset import HDFDataset
    if config_str.startswith("sprint:"):
        kwargs["sprintConfigStr"] = config_str[len("sprint:"):]
        assert config, "need config for dataset in 'sprint:...' format. or use 'ExternSprintDataset:...' instead"
        sprintTrainerExecPath = config.value("sprint_trainer_exec_path", None)
        assert sprintTrainerExecPath, "specify sprint_trainer_exec_path in config"
        kwargs["sprintTrainerExecPath"] = sprintTrainerExecPath
        from SprintDataset import ExternSprintDataset
        cls = ExternSprintDataset
    elif config_str.startswith("config:"):
        from Config import get_global_config
        if not config:
            config = get_global_config()
        data = eval(config_str[len("config:"):], config.typed_dict,
                    config.typed_dict)
        return init_dataset(data)
    elif ":" in config_str:
        kwargs.update(eval("dict(%s)" % config_str[config_str.find(":") + 1:]))
        class_name = config_str[:config_str.find(":")]
        cls = get_dataset_class(class_name)
    else:
        if cache_byte_size is not None:
            kwargs["cache_byte_size"] = cache_byte_size
        cls = HDFDataset
    if config:
        data = cls.from_config(config, **kwargs)
    else:
        data = cls(**kwargs)
    if isinstance(data, HDFDataset):
        for f in config_str.split(","):
            if f:
                assert os.path.exists(f)
                data.add_file(f)
    data.initialize()
    return data
Exemple #8
0
 def select_engine(cls, engine=None, config=None):
     """
 :param int engine:
 :param Config.Config config:
 """
     assert cls.selectedEngine is None, "already set"
     if engine is None:
         if config is None:
             from Config import get_global_config
             config = get_global_config()
         engine = cls.Default
         if config.bool("use_theano", False):
             engine = cls.Theano
         if config.bool("use_tensorflow", False):
             engine = cls.TensorFlow
     cls.selectedEngine = engine
        def perform(self, node, inputs, output_storage, params=None):
            start_time = time.time()
            log_posteriors, seq_lengths = inputs

            if numpy.isnan(log_posteriors).any():
                print('SprintErrorSigOp: log_posteriors contain NaN!',
                      file=log.v1)
            if numpy.isinf(log_posteriors).any():
                print('SprintErrorSigOp: log_posteriors contain Inf!',
                      file=log.v1)
                print('SprintErrorSigOp: log_posteriors:',
                      log_posteriors,
                      file=log.v1)

            if self.sprint_instance_pool is None:
                print("SprintErrorSigOp: Starting Sprint %r" %
                      self.sprint_opts,
                      file=log.v3)
                self.sprint_instance_pool = SprintInstancePool.get_global_instance(
                    sprint_opts=self.sprint_opts)

            assert isinstance(self.sprint_instance_pool,
                              SprintInstancePool)  # PyCharm confused otherwise
            loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(
                log_posteriors, seq_lengths)
            output_storage[0][0] = loss
            output_storage[1][0] = errsig

            print('SprintErrorSigOp: avg frame loss for segments:',
                  loss.sum() / seq_lengths.sum(),
                  file=log.v5)
            end_time = time.time()
            if self.debug_perform_time is None:
                from Config import get_global_config
                config = get_global_config()
                self.debug_perform_time = config.bool(
                    "debug_SprintErrorSigOp_perform_time", False)
            if self.debug_perform_time:
                print("SprintErrorSigOp perform time:",
                      end_time - start_time,
                      file=log.v1)
                from Device import deviceInstance
                assert deviceInstance.is_device_proc()
                forward_time = start_time - deviceInstance.compute_start_time
                print("SprintErrorSigOp forward time:",
                      forward_time,
                      file=log.v1)
Exemple #10
0
 def __init__(self, config=None):
     """
 :param Config.Config|None config:
 """
     if config is None:
         from Config import get_global_config
         config = get_global_config()
     self.config = config
     self.devices_config = self._get_devices_config()
     self._check_devices()
     self.tf_session = None  # type: tf.Session
     self.updater = None  # type: Updater
     self._checked_uninitialized_vars = False
     self._merge_all_summaries = None
     self.dataset_batches = {}  # type: dict[str,BatchSetGenerator]
     self.train_data = None
     " :type: Dataset.Dataset "
     self.start_epoch = None
Exemple #11
0
def init_dataset_via_str(config_str, config=None, cache_byte_size=None, **kwargs):
  """
  :param str config_str: hdf-files, or "LmDataset:..." or so
  :param Config.Config|None config: optional, only for "sprint:..."
  :param int|None cache_byte_size: optional, only for HDFDataset
  :rtype: Dataset
  """
  kwargs = kwargs.copy()
  if 'window' not in kwargs and config and config.has('window'):
    kwargs['window'] = config.int('window', 1)
  from HDFDataset import HDFDataset
  if config_str.startswith("sprint:"):
    kwargs["sprintConfigStr"] = config_str[len("sprint:"):]
    assert config, "need config for dataset in 'sprint:...' format. or use 'ExternSprintDataset:...' instead"
    sprint_trainer_exec_path = config.value("sprint_trainer_exec_path", None)
    assert sprint_trainer_exec_path, "specify sprint_trainer_exec_path in config"
    kwargs["sprintTrainerExecPath"] = sprint_trainer_exec_path
    from SprintDataset import ExternSprintDataset
    cls = ExternSprintDataset
  elif config_str.startswith("config:"):
    from Config import get_global_config
    if not config:
      config = get_global_config()
    data = eval(config_str[len("config:"):], config.typed_dict, config.typed_dict)
    return init_dataset(data, extra_kwargs=kwargs)
  elif ":" in config_str:
    kwargs.update(eval("dict(%s)" % config_str[config_str.find(":") + 1:]))
    class_name = config_str[:config_str.find(":")]
    cls = get_dataset_class(class_name)
  else:
    if cache_byte_size is not None:
      kwargs["cache_byte_size"] = cache_byte_size
    cls = HDFDataset
  if config:
    data = cls.from_config(config, **kwargs)
  else:
    data = cls(**kwargs)
  if isinstance(data, HDFDataset):
    for f in config_str.split(","):
      if f:
        assert os.path.exists(f)
        data.add_file(f)
  data.initialize()
  return data
Exemple #12
0
 def __init__(self, config=None):
   """
   :param Config.Config|None config:
   """
   if config is None:
     from Config import get_global_config
     config = get_global_config()
   self.config = config
   self.devices_config = self._get_devices_config()
   self._check_devices()
   self.tf_session = None  # type: tf.Session
   self.network = None  # type: TFNetwork
   self.updater = None  # type: Updater
   self._checked_uninitialized_vars = False
   self._merge_all_summaries = None
   self.dataset_batches = {}  # type: dict[str,BatchSetGenerator]
   self.train_data = None  # type: Dataset
   self.start_epoch = None
   self.use_dynamic_train_flag = False
   self.use_search_flag = False
   self._const_cache = {}  # type: dict[str,tf.Tensor]
    def perform(self, node, inputs, output_storage):
        start_time = time.time()
        log_posteriors, seq_lengths = inputs

        if numpy.isnan(log_posteriors).any():
            print >> log.v1, 'SprintErrorSigOp: log_posteriors contain NaN!'
        if numpy.isinf(log_posteriors).any():
            print >> log.v1, 'SprintErrorSigOp: log_posteriors contain Inf!'
            #numpy.set_printoptions(threshold=numpy.nan)
            print >> log.v1, 'SprintErrorSigOp: log_posteriors:', log_posteriors

        if self.sprint_instance_pool is None:
            print >> log.v3, "SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts
            self.sprint_instance_pool = SprintInstancePool.get_global_instance(
                sprint_opts=self.sprint_opts)

        loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(
            log_posteriors, seq_lengths)
        #print >> log.v4, 'loss:', loss, 'errsig:', errsig
        output_storage[0][0] = loss
        output_storage[1][0] = errsig

        print >> log.v5, 'SprintErrorSigOp: avg frame loss for segments:', loss.sum(
        ) / seq_lengths.sum()
        end_time = time.time()
        if self.debug_perform_time is None:
            from Config import get_global_config
            config = get_global_config()
            self.debug_perform_time = config.bool(
                "debug_SprintErrorSigOp_perform_time", False)
        if self.debug_perform_time:
            print >> log.v1, "SprintErrorSigOp perform time:", end_time - start_time
            from Device import deviceInstance
            assert deviceInstance.is_device_proc()
            forward_time = start_time - deviceInstance.compute_start_time
            print >> log.v1, "SprintErrorSigOp forward time:", forward_time
 def __init__(self,
              sprintExecPath,
              minPythonControlVersion=2,
              sprintConfigStr="",
              sprintControlConfig=None,
              usePythonSegmentOrder=True):
     """
 :param str sprintExecPath: this executable will be called for the sub proc.
 :param int minPythonControlVersion: will be checked in the subprocess. via Sprint PythonControl
 :param str sprintConfigStr: passed to Sprint as command line args.
   can have "config:" prefix - in that case, looked up in config.
   handled via eval_shell_str(), can thus have lazy content (if it is callable, will be called).
 :param dict[str]|None sprintControlConfig: passed to SprintControl.init().
 """
     assert os.path.exists(sprintExecPath)
     self.sprintExecPath = sprintExecPath
     self.minPythonControlVersion = minPythonControlVersion
     if sprintConfigStr.startswith("config:"):
         from Config import get_global_config
         config = get_global_config()
         assert config
         sprintConfigStr = config.typed_dict[
             sprintConfigStr[len("config:"):]]
     self.sprintConfig = eval_shell_str(sprintConfigStr)
     self.sprintControlConfig = sprintControlConfig
     self.usePythonSegmentOrder = usePythonSegmentOrder
     self.child_pid = None
     self.parent_pid = os.getpid()
     # There is no generic way to see whether Python is exiting.
     # This is our workaround. We check for it in self.run_inner().
     self.python_exit = False
     atexit.register(self.exit_handler)
     self._cur_seg_name = None
     self._cur_posteriors_shape = None
     self.is_calculating = False
     self.init()
Exemple #15
0
def get_layer_class(name, raise_exception=True):
  """
  :type name: str
  :rtype: type(NetworkHiddenLayer.HiddenLayer)
  """
  if name in LayerClasses:
    return LayerClasses[name]
  if name.startswith("config."):
    from Config import get_global_config
    config = get_global_config()
    cls = config.typed_value(name[len("config."):])
    import inspect
    if not inspect.isclass(cls):
      if raise_exception:
        raise Exception("get_layer_class: %s not found" % name)
      else:
        return None
    if cls.layer_class is None:
      # Will make Layer.save() (to HDF) work correctly.
      cls.layer_class = name
    return cls
  if raise_exception:
    raise Exception("get_layer_class: invalid layer type: %s" % name)
  return None
Exemple #16
0
#!/usr/bin/env python3

import os
import sys
sys.path.insert(0, os.path.dirname(__file__) + "/..")  # parent dir for Returnn

import better_exchook
better_exchook.install()

from pprint import pprint
from TFEngine import Engine
from Dataset import init_dataset
from Config import get_global_config
from Util import get_login_username

config = get_global_config(auto_create=True)
config.update(dict(
  batching="random",
  batch_size=5000,
  max_seqs=10,
  chunking="0",

  network={
    "fw0": {"class": "rec", "unit": "NativeLstm2", "dropout": 0.1, "n_out": 10},
    "output": {"class": "softmax", "loss": "ce", "from": ["fw0"]}
  },

  # training
  nadam=True,
  learning_rate=0.01,
  num_epochs=100,