def perform(self, node, inputs, output_storage): start_time = time.time() log_posteriors, seq_lengths = inputs if numpy.isnan(log_posteriors).any(): print >> log.v1, 'SprintErrorSigOp: log_posteriors contain NaN!' if numpy.isinf(log_posteriors).any(): print >> log.v1, 'SprintErrorSigOp: log_posteriors contain Inf!' #numpy.set_printoptions(threshold=numpy.nan) print >> log.v1, 'SprintErrorSigOp: log_posteriors:', log_posteriors if self.sprint_instance_pool is None: print >> log.v3, "SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts self.sprint_instance_pool = SprintInstancePool.get_global_instance(sprint_opts=self.sprint_opts) loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(log_posteriors, seq_lengths) #print >> log.v4, 'loss:', loss, 'errsig:', errsig output_storage[0][0] = loss output_storage[1][0] = errsig print >> log.v5, 'SprintErrorSigOp: avg frame loss for segments:', loss.sum() / seq_lengths.sum() end_time = time.time() if self.debug_perform_time is None: from Config import get_global_config config = get_global_config() self.debug_perform_time = config.bool("debug_SprintErrorSigOp_perform_time", False) if self.debug_perform_time: print >>log.v1, "SprintErrorSigOp perform time:", end_time - start_time from Device import deviceInstance assert deviceInstance.is_device_proc() forward_time = start_time - deviceInstance.compute_start_time print >> log.v1, "SprintErrorSigOp forward time:", forward_time
def perform(self, node, inputs, output_storage, params=None): start_time = time.time() log_posteriors, seq_lengths = inputs if numpy.isnan(log_posteriors).any(): print('SprintErrorSigOp: log_posteriors contain NaN!', file=log.v1) if numpy.isinf(log_posteriors).any(): print('SprintErrorSigOp: log_posteriors contain Inf!', file=log.v1) print('SprintErrorSigOp: log_posteriors:', log_posteriors, file=log.v1) if self.sprint_instance_pool is None: print("SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts, file=log.v3) self.sprint_instance_pool = SprintInstancePool.get_global_instance(sprint_opts=self.sprint_opts) assert isinstance(self.sprint_instance_pool, SprintInstancePool) # PyCharm confused otherwise loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal(log_posteriors, seq_lengths) output_storage[0][0] = loss output_storage[1][0] = errsig print('SprintErrorSigOp: avg frame loss for segments:', loss.sum() / seq_lengths.sum(), file=log.v5) end_time = time.time() if self.debug_perform_time is None: from Config import get_global_config config = get_global_config() self.debug_perform_time = config.bool("debug_SprintErrorSigOp_perform_time", False) if self.debug_perform_time: print("SprintErrorSigOp perform time:", end_time - start_time, file=log.v1) from Device import deviceInstance assert deviceInstance.is_device_proc() forward_time = start_time - deviceInstance.compute_start_time print("SprintErrorSigOp forward time:", forward_time, file=log.v1)
def __init__(self, sprintExecPath, minPythonControlVersion=2, sprintConfigStr="", sprintControlConfig=None, usePythonSegmentOrder=True): """ :param str sprintExecPath: this executable will be called for the sub proc. :param int minPythonControlVersion: will be checked in the subprocess. via Sprint PythonControl :param str sprintConfigStr: passed to Sprint as command line args. can have "config:" prefix - in that case, looked up in config. handled via eval_shell_str(), can thus have lazy content (if it is callable, will be called). :param dict[str]|None sprintControlConfig: passed to SprintControl.init(). """ assert os.path.exists(sprintExecPath) self.sprintExecPath = sprintExecPath self.minPythonControlVersion = minPythonControlVersion if sprintConfigStr.startswith("config:"): from Config import get_global_config config = get_global_config() assert config sprintConfigStr = config.typed_dict[sprintConfigStr[len("config:"):]] self.sprintConfig = eval_shell_str(sprintConfigStr) self.sprintControlConfig = sprintControlConfig self.usePythonSegmentOrder = usePythonSegmentOrder self.child_pid = None self.parent_pid = os.getpid() # There is no generic way to see whether Python is exiting. # This is our workaround. We check for it in self.run_inner(). self.python_exit = False atexit.register(self.exit_handler) self._cur_seg_name = None self._cur_posteriors_shape = None self.is_calculating = False self.init()
def init_dataset(kwargs): """ :param dict[str]|str|(()->dict[str]) kwargs: :rtype: Dataset """ assert kwargs if callable(kwargs): return init_dataset(kwargs()) if isinstance(kwargs, (str, unicode)): if kwargs.startswith("config:"): from Config import get_global_config config = get_global_config() assert config return init_dataset(config.opt_typed_value( kwargs[len("config:"):])) return init_dataset_via_str(config_str=kwargs) kwargs = kwargs.copy() assert "class" in kwargs clazz_name = kwargs.pop("class") clazz = get_dataset_class(clazz_name) if not clazz: raise Exception("Dataset class %r not found" % clazz_name) files = kwargs.pop("files", []) obj = clazz(**kwargs) assert isinstance(obj, Dataset) if files: from HDFDataset import HDFDataset, NextGenHDFDataset assert isinstance(obj, (HDFDataset, NextGenHDFDataset)) for f in files: obj.add_file(f) obj.initialize() return obj
def __init__(self, config=None, extern_data=None, rnd_seed=42, train_flag=False, parent=None): """ :param Config.Config config: only needed to init extern_data if not specified explicitly :param ExternData|None extern_data: :param int rnd_seed: :param bool|tf.Tensor train_flag: True if we want to use this model in training, False if in eval, or dynamic :param TFNetworkLayer.LayerBase|None parent: """ if extern_data is None: extern_data = ExternData() if not config: from Config import get_global_config config = get_global_config() extern_data.init_from_config(config) self.extern_data = extern_data self.used_data_keys = set() self.rnd_seed = rnd_seed self.random = numpy.random.RandomState(rnd_seed) self.train_flag = train_flag self.parent = parent self._selected_train_layers = None self.layers_desc = {} # type: dict[str,dict[str]] self.layers = {} # type: dict[str,LayerBase] self.loss_by_layer = {} # type: dict[str,tf.Tensor] self.error_by_layer = {} # type: dict[str,tf.Tensor] self.total_loss = None # type: tf.Tensor self.total_constraints = None # type: tf.Tensor self.total_objective = None # type: tf.Tensor self.global_train_step = tf.Variable( name="global_step", initial_value=0, dtype="int64", collections=[tf.GraphKeys.GLOBAL_STEP], trainable=False) self.saver = None # type: tf.train.Saver self.recurrent = False self._assigner_cache = {} # type: dict[tf.Variable,VariableAssigner] self.concat_sources_dropout_cache = {} # type: dict[(tuple[LayerBase],float),Data]
def __init__(self, config=None, extern_data=None, rnd_seed=42, train_flag=False, search_flag=False, parent_layer=None, parent_net=None, name=None): """ :param Config.Config config: only needed to init extern_data if not specified explicitly :param ExternData|None extern_data: :param int rnd_seed: :param bool|tf.Tensor train_flag: True if we want to use this model in training, False if in eval, or dynamic :param TFNetworkLayer.LayerBase|None parent_layer: :param TFNetwork parent_net: :param str name: only for debugging """ if not name: from Util import try_get_caller_name name = "<network via %s>" % try_get_caller_name(fallback="<unknown>") self.name = name if extern_data is None: extern_data = ExternData() if not config: from Config import get_global_config config = get_global_config() extern_data.init_from_config(config) self.extern_data = extern_data self._config = config self.used_data_keys = set() self.rnd_seed = rnd_seed self.random = numpy.random.RandomState(rnd_seed) assert isinstance(train_flag, (bool, tf.Tensor)) self.train_flag = train_flag self.search_flag = search_flag self.parent_layer = parent_layer if not parent_net and parent_layer: parent_net = parent_layer.network self.parent_net = parent_net self._selected_train_layers = None self.layers_desc = {} # type: dict[str,dict[str]] self.layers = {} # type: dict[str,LayerBase] self.loss_by_layer = {} # type: dict[str,tf.Tensor] self.error_by_layer = {} # type: dict[str,tf.Tensor] self.total_loss = None # type: tf.Tensor self.total_constraints = None # type: tf.Tensor self.total_objective = None # type: tf.Tensor if parent_net: self.global_train_step = parent_net.global_train_step else: self.global_train_step = tf.Variable( name="global_step", initial_value=0, dtype="int64", collections=[tf.GraphKeys.GLOBAL_STEP], trainable=False) self.saver = None # type: tf.train.Saver self.recurrent = False self._assigner_cache = {} # type: dict[tf.Variable,VariableAssigner] self.concat_sources_dropout_cache = {} # type: dict[(tuple[LayerBase],float),Data]
def init_dataset_via_str(config_str, config=None, cache_byte_size=None, **kwargs): """ :param str config_str: hdf-files, or "LmDataset:..." or so :param Config.Config|None config: optional, only for "sprint:..." :param int|None cache_byte_size: optional, only for HDFDataset :rtype: Dataset """ kwargs = kwargs.copy() if 'window' not in kwargs and config and config.has('window'): kwargs['window'] = config.int('window', 1) from HDFDataset import HDFDataset if config_str.startswith("sprint:"): kwargs["sprintConfigStr"] = config_str[len("sprint:"):] assert config, "need config for dataset in 'sprint:...' format. or use 'ExternSprintDataset:...' instead" sprintTrainerExecPath = config.value("sprint_trainer_exec_path", None) assert sprintTrainerExecPath, "specify sprint_trainer_exec_path in config" kwargs["sprintTrainerExecPath"] = sprintTrainerExecPath from SprintDataset import ExternSprintDataset cls = ExternSprintDataset elif config_str.startswith("config:"): from Config import get_global_config if not config: config = get_global_config() data = eval(config_str[len("config:"):], config.typed_dict, config.typed_dict) return init_dataset(data) elif ":" in config_str: kwargs.update(eval("dict(%s)" % config_str[config_str.find(":") + 1:])) class_name = config_str[:config_str.find(":")] cls = get_dataset_class(class_name) else: if cache_byte_size is not None: kwargs["cache_byte_size"] = cache_byte_size cls = HDFDataset if config: data = cls.from_config(config, **kwargs) else: data = cls(**kwargs) if isinstance(data, HDFDataset): for f in config_str.split(","): if f: assert os.path.exists(f) data.add_file(f) data.initialize() return data
def select_engine(cls, engine=None, config=None): """ :param int engine: :param Config.Config config: """ assert cls.selectedEngine is None, "already set" if engine is None: if config is None: from Config import get_global_config config = get_global_config() engine = cls.Default if config.bool("use_theano", False): engine = cls.Theano if config.bool("use_tensorflow", False): engine = cls.TensorFlow cls.selectedEngine = engine
def perform(self, node, inputs, output_storage, params=None): start_time = time.time() log_posteriors, seq_lengths = inputs if numpy.isnan(log_posteriors).any(): print('SprintErrorSigOp: log_posteriors contain NaN!', file=log.v1) if numpy.isinf(log_posteriors).any(): print('SprintErrorSigOp: log_posteriors contain Inf!', file=log.v1) print('SprintErrorSigOp: log_posteriors:', log_posteriors, file=log.v1) if self.sprint_instance_pool is None: print("SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts, file=log.v3) self.sprint_instance_pool = SprintInstancePool.get_global_instance( sprint_opts=self.sprint_opts) assert isinstance(self.sprint_instance_pool, SprintInstancePool) # PyCharm confused otherwise loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal( log_posteriors, seq_lengths) output_storage[0][0] = loss output_storage[1][0] = errsig print('SprintErrorSigOp: avg frame loss for segments:', loss.sum() / seq_lengths.sum(), file=log.v5) end_time = time.time() if self.debug_perform_time is None: from Config import get_global_config config = get_global_config() self.debug_perform_time = config.bool( "debug_SprintErrorSigOp_perform_time", False) if self.debug_perform_time: print("SprintErrorSigOp perform time:", end_time - start_time, file=log.v1) from Device import deviceInstance assert deviceInstance.is_device_proc() forward_time = start_time - deviceInstance.compute_start_time print("SprintErrorSigOp forward time:", forward_time, file=log.v1)
def __init__(self, config=None): """ :param Config.Config|None config: """ if config is None: from Config import get_global_config config = get_global_config() self.config = config self.devices_config = self._get_devices_config() self._check_devices() self.tf_session = None # type: tf.Session self.updater = None # type: Updater self._checked_uninitialized_vars = False self._merge_all_summaries = None self.dataset_batches = {} # type: dict[str,BatchSetGenerator] self.train_data = None " :type: Dataset.Dataset " self.start_epoch = None
def init_dataset_via_str(config_str, config=None, cache_byte_size=None, **kwargs): """ :param str config_str: hdf-files, or "LmDataset:..." or so :param Config.Config|None config: optional, only for "sprint:..." :param int|None cache_byte_size: optional, only for HDFDataset :rtype: Dataset """ kwargs = kwargs.copy() if 'window' not in kwargs and config and config.has('window'): kwargs['window'] = config.int('window', 1) from HDFDataset import HDFDataset if config_str.startswith("sprint:"): kwargs["sprintConfigStr"] = config_str[len("sprint:"):] assert config, "need config for dataset in 'sprint:...' format. or use 'ExternSprintDataset:...' instead" sprint_trainer_exec_path = config.value("sprint_trainer_exec_path", None) assert sprint_trainer_exec_path, "specify sprint_trainer_exec_path in config" kwargs["sprintTrainerExecPath"] = sprint_trainer_exec_path from SprintDataset import ExternSprintDataset cls = ExternSprintDataset elif config_str.startswith("config:"): from Config import get_global_config if not config: config = get_global_config() data = eval(config_str[len("config:"):], config.typed_dict, config.typed_dict) return init_dataset(data, extra_kwargs=kwargs) elif ":" in config_str: kwargs.update(eval("dict(%s)" % config_str[config_str.find(":") + 1:])) class_name = config_str[:config_str.find(":")] cls = get_dataset_class(class_name) else: if cache_byte_size is not None: kwargs["cache_byte_size"] = cache_byte_size cls = HDFDataset if config: data = cls.from_config(config, **kwargs) else: data = cls(**kwargs) if isinstance(data, HDFDataset): for f in config_str.split(","): if f: assert os.path.exists(f) data.add_file(f) data.initialize() return data
def __init__(self, config=None): """ :param Config.Config|None config: """ if config is None: from Config import get_global_config config = get_global_config() self.config = config self.devices_config = self._get_devices_config() self._check_devices() self.tf_session = None # type: tf.Session self.network = None # type: TFNetwork self.updater = None # type: Updater self._checked_uninitialized_vars = False self._merge_all_summaries = None self.dataset_batches = {} # type: dict[str,BatchSetGenerator] self.train_data = None # type: Dataset self.start_epoch = None self.use_dynamic_train_flag = False self.use_search_flag = False self._const_cache = {} # type: dict[str,tf.Tensor]
def perform(self, node, inputs, output_storage): start_time = time.time() log_posteriors, seq_lengths = inputs if numpy.isnan(log_posteriors).any(): print >> log.v1, 'SprintErrorSigOp: log_posteriors contain NaN!' if numpy.isinf(log_posteriors).any(): print >> log.v1, 'SprintErrorSigOp: log_posteriors contain Inf!' #numpy.set_printoptions(threshold=numpy.nan) print >> log.v1, 'SprintErrorSigOp: log_posteriors:', log_posteriors if self.sprint_instance_pool is None: print >> log.v3, "SprintErrorSigOp: Starting Sprint %r" % self.sprint_opts self.sprint_instance_pool = SprintInstancePool.get_global_instance( sprint_opts=self.sprint_opts) loss, errsig = self.sprint_instance_pool.get_batch_loss_and_error_signal( log_posteriors, seq_lengths) #print >> log.v4, 'loss:', loss, 'errsig:', errsig output_storage[0][0] = loss output_storage[1][0] = errsig print >> log.v5, 'SprintErrorSigOp: avg frame loss for segments:', loss.sum( ) / seq_lengths.sum() end_time = time.time() if self.debug_perform_time is None: from Config import get_global_config config = get_global_config() self.debug_perform_time = config.bool( "debug_SprintErrorSigOp_perform_time", False) if self.debug_perform_time: print >> log.v1, "SprintErrorSigOp perform time:", end_time - start_time from Device import deviceInstance assert deviceInstance.is_device_proc() forward_time = start_time - deviceInstance.compute_start_time print >> log.v1, "SprintErrorSigOp forward time:", forward_time
def __init__(self, sprintExecPath, minPythonControlVersion=2, sprintConfigStr="", sprintControlConfig=None, usePythonSegmentOrder=True): """ :param str sprintExecPath: this executable will be called for the sub proc. :param int minPythonControlVersion: will be checked in the subprocess. via Sprint PythonControl :param str sprintConfigStr: passed to Sprint as command line args. can have "config:" prefix - in that case, looked up in config. handled via eval_shell_str(), can thus have lazy content (if it is callable, will be called). :param dict[str]|None sprintControlConfig: passed to SprintControl.init(). """ assert os.path.exists(sprintExecPath) self.sprintExecPath = sprintExecPath self.minPythonControlVersion = minPythonControlVersion if sprintConfigStr.startswith("config:"): from Config import get_global_config config = get_global_config() assert config sprintConfigStr = config.typed_dict[ sprintConfigStr[len("config:"):]] self.sprintConfig = eval_shell_str(sprintConfigStr) self.sprintControlConfig = sprintControlConfig self.usePythonSegmentOrder = usePythonSegmentOrder self.child_pid = None self.parent_pid = os.getpid() # There is no generic way to see whether Python is exiting. # This is our workaround. We check for it in self.run_inner(). self.python_exit = False atexit.register(self.exit_handler) self._cur_seg_name = None self._cur_posteriors_shape = None self.is_calculating = False self.init()
def get_layer_class(name, raise_exception=True): """ :type name: str :rtype: type(NetworkHiddenLayer.HiddenLayer) """ if name in LayerClasses: return LayerClasses[name] if name.startswith("config."): from Config import get_global_config config = get_global_config() cls = config.typed_value(name[len("config."):]) import inspect if not inspect.isclass(cls): if raise_exception: raise Exception("get_layer_class: %s not found" % name) else: return None if cls.layer_class is None: # Will make Layer.save() (to HDF) work correctly. cls.layer_class = name return cls if raise_exception: raise Exception("get_layer_class: invalid layer type: %s" % name) return None
#!/usr/bin/env python3 import os import sys sys.path.insert(0, os.path.dirname(__file__) + "/..") # parent dir for Returnn import better_exchook better_exchook.install() from pprint import pprint from TFEngine import Engine from Dataset import init_dataset from Config import get_global_config from Util import get_login_username config = get_global_config(auto_create=True) config.update(dict( batching="random", batch_size=5000, max_seqs=10, chunking="0", network={ "fw0": {"class": "rec", "unit": "NativeLstm2", "dropout": 0.1, "n_out": 10}, "output": {"class": "softmax", "loss": "ce", "from": ["fw0"]} }, # training nadam=True, learning_rate=0.01, num_epochs=100,