def forward(segmentName, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return numpy.ndarray, format (output-dim,time) """ print("Sprint forward", segmentName, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). T = features.shape[1] assert features.shape == (InputDim, T) # Fill the data for the current segment. sprintDataset.shuffle_frames_of_nseqs = 0 # We must not shuffle. sprintDataset.initSprintEpoch(None) # Reset cache. We don't need old seqs anymore. sprintDataset.init_seq_order() seq = sprintDataset.addNewData(features, segmentName=segmentName) if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, sprintDataset, seq) assert success, "failed to allocate & assign data for seq %i, %s" % (seq, segmentName) # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=sprintDataset, seq_idx=seq) else: raise NotImplementedError("unknown backend engine") # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (T, 1, OutputDim) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (T, OutputDim) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim, T) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid() debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid() print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def initDevices(): """ :rtype: list[Device] """ oldDeviceConfig = ",".join(config.list('device', ['default'])) if BackendEngine.is_tensorflow_selected(): if os.environ.get("TF_DEVICE"): config.set("device", os.environ.get("TF_DEVICE")) print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), oldDeviceConfig), file=log.v4) if not BackendEngine.is_theano_selected(): return None if config.value("task", "train") == "nop": return [] if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % \ (TheanoFlags["device"], oldDeviceConfig), file=log.v4) devArgs = getDevicesInitArgs(config) assert len(devArgs) > 0 devices = [Device(**kwargs) for kwargs in devArgs] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get( "CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault( "gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % (socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in [ "grad", "param" ], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debugRegisterBetterRepr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debugRegisterBetterRepr() else: raise NotImplementedError
def _forward(segment_name, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return: format (output-dim,time) :rtype: numpy.ndarray """ print("Sprint forward", segment_name, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). num_time = features.shape[1] assert features.shape == (InputDim, num_time) dataset, seq_idx = features_to_dataset(features=features, segment_name=segment_name) if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, dataset=dataset, seq=seq_idx) assert success, "failed to allocate & assign data for seq %i, %s" % (seq_idx, segment_name) # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx) else: raise NotImplementedError("unknown backend engine") # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid() debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid() print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def finalize(): global quit quit = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def finalize(): print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit quit = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def num_inputs_outputs_from_config(cls, config): """ :type config: Config.Config :returns (num_inputs, num_outputs), where num_inputs is like num_outputs["data"][0], and num_outputs is a dict of data_key -> (dim, ndim), where data_key is e.g. "classes" or "data", dim is the feature dimension or the number of classes, and ndim is the ndim counted without batch-dim, i.e. ndim=1 means usually sparse data and ndim=2 means dense data. :rtype: (int,dict[str,(int,int)]) """ num_inputs = config.int('num_inputs', 0) target = config.value('target', 'classes') if config.is_typed('num_outputs'): num_outputs = config.typed_value('num_outputs') if not isinstance(num_outputs, dict): num_outputs = {target: num_outputs} num_outputs = num_outputs.copy() from Dataset import convert_data_dims from Util import BackendEngine num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected()) if "data" in num_outputs: num_inputs = num_outputs["data"][0] elif config.has('num_outputs'): num_outputs = {target: [config.int('num_outputs', 0), 1]} else: num_outputs = None dataset = None if config.list('train') and ":" not in config.value('train', ''): dataset = config.list('train')[0] if not config.is_typed('num_outputs') and dataset: try: _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1) except Exception: _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1) try: _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]} except Exception: _num_outputs = hdf5_group(dataset, 'targets/size') for k in _num_outputs: _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))] if num_inputs: assert num_inputs == _num_inputs if num_outputs: assert num_outputs == _num_outputs num_inputs = _num_inputs num_outputs = _num_outputs if not num_inputs and not num_outputs and config.has("load"): from Network import LayerNetwork import h5py model = h5py.File(config.value("load", ""), "r") num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model) assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train" return num_inputs, num_outputs
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) from TFUtil import debugRegisterBetterRepr debugRegisterBetterRepr() else: raise NotImplementedError
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print >> log.v3, "Theano:", describe_theano_version() elif BackendEngine.is_tensorflow_selected(): print >> log.v3, "TensorFlow:", describe_tensorflow_version() from Util import to_bool from TFUtil import debugRegisterBetterRepr if os.environ.get("DEBUG_TF_BETTER_REPR") and to_bool( os.environ.get("DEBUG_TF_BETTER_REPR")): debugRegisterBetterRepr() else: raise NotImplementedError
def initEngine(devices): """ :type devices: list[Device] Initializes global engine. """ global engine if BackendEngine.is_theano_selected(): engine = Engine(devices) elif BackendEngine.is_tensorflow_selected(): import TFEngine engine = TFEngine.Engine(config=config) else: raise NotImplementedError
def init_backend_engine(): """ Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`. """ BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % ( os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket # noinspection PyPackageRequirements,PyUnresolvedReferences import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get("CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault("gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % ( socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in ["grad", "param"], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debug_register_better_repr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debug_register_better_repr() else: raise NotImplementedError
def finalize(): """ Cleanup at the end. """ print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit_returnn quit_returnn = True sys.exited = True if engine: if BackendEngine.is_theano_selected(): for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): engine.finalize()
def init_engine(devices): """ Initializes global engine. :type devices: list[Device.Device]|None """ global engine if BackendEngine.is_theano_selected(): import Engine engine = Engine.Engine(devices) elif BackendEngine.is_tensorflow_selected(): import TFEngine engine = TFEngine.Engine(config=config) else: raise NotImplementedError
def finalize(): """ Cleanup at the end. """ print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit_returnn quit_returnn = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def set_config_num_inputs_outputs_from_dataset(config, dataset): """ :param Config.Config config: :param Dataset dataset: """ from Util import BackendEngine if BackendEngine.is_tensorflow_selected(): # TF supports more fine-grained specification, # however the dataset does not store that in num_outputs. from TFNetwork import ExternData config.set("extern_data", { key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key) for key in dataset.get_data_keys()}) else: config.set("num_inputs", dataset.num_inputs) config.set("num_outputs", dataset.num_outputs)
def set_config_num_inputs_outputs_from_dataset(config, dataset): """ :param Config.Config config: :param Dataset dataset: """ config.set("num_inputs", dataset.num_inputs) from Util import BackendEngine if BackendEngine.is_tensorflow_selected(): # TF supports more fine-grained specification, # however the dataset does not store that in num_outputs. from TFNetwork import ExternData config.set("num_outputs", { key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key) for key in dataset.get_data_keys()}) else: config.set("num_outputs", dataset.num_outputs)
def get_existing_models(cls, config): model_filename = config.value('model', '') if not model_filename: return [] # Automatically search the filesystem for existing models. file_list = [] for epoch in range(1, cls.config_get_final_epoch(config) + 1): for is_pretrain in [False, True]: fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain) if os.path.exists(fn): file_list += [(epoch, fn)] # epoch, fn break if BackendEngine.is_tensorflow_selected(): if os.path.exists(fn + ".index"): file_list += [(epoch, fn)] # epoch, fn break file_list.sort() return file_list
def get_existing_models(cls, config): """ :param Config.Config config: :return: dict epoch -> model filename :rtype: dict[int,str] """ model_filename = config.value('model', '') if not model_filename: return [] # Automatically search the filesystem for existing models. file_list = {} for epoch in range(1, cls.config_get_final_epoch(config) + 1): for is_pretrain in [False, True]: fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain) if os.path.exists(fn): file_list[epoch] = fn break if BackendEngine.is_tensorflow_selected(): if os.path.exists(fn + ".index"): file_list[epoch] = fn break return file_list
def get_existing_models(cls, config): """ :param Config.Config config: :return: dict epoch -> model filename :rtype: dict[int,str] """ model_filename = config.value('model', '') if not model_filename: return [] # Automatically search the filesystem for existing models. file_list = {} for epoch in range(1, cls.config_get_final_epoch(config) + 1): for is_pretrain in [False, True]: fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain) if os.path.exists(fn): file_list[epoch] = fn break if BackendEngine.is_tensorflow_selected(): if os.path.exists(fn + ".index"): file_list[epoch] = fn break return file_list
def get_epoch_model(cls, config): """ :type config: Config.Config :returns (epoch, modelFilename) :rtype: (int|None, str|None) """ # XXX: We cache it, although this is wrong if we have changed the config. if cls._epoch_model: return cls._epoch_model start_epoch_mode = config.value('start_epoch', 'auto') if start_epoch_mode == 'auto': start_epoch = None else: start_epoch = int(start_epoch_mode) assert start_epoch >= 1 load_model_epoch_filename = config.value('load', '') if load_model_epoch_filename: fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix = ".meta" assert os.path.exists(load_model_epoch_filename + fn_postfix) import_model_train_epoch1 = config.value('import_model_train_epoch1', '') if import_model_train_epoch1: assert os.path.exists(import_model_train_epoch1) existing_models = cls.get_existing_models(config) # Only use this when we don't train. # For training, we first consider existing models before we take the 'load' into account when in auto epoch mode. # In all other cases, we use the model specified by 'load'. if load_model_epoch_filename and (config.value('task', 'train') != 'train' or start_epoch is not None): epoch = model_epoch_from_filename(load_model_epoch_filename) if config.value('task', 'train') == 'train' and start_epoch is not None: # Ignore the epoch. To keep it consistent with the case below. epoch = None epoch_model = (epoch, load_model_epoch_filename) # In case of training, always first consider existing models. # This is because we reran CRNN training, we usually don't want to train from scratch # but resume where we stopped last time. elif existing_models: epoch_model = existing_models[-1] if load_model_epoch_filename: print >> log.v4, "note: there is a 'load' which we ignore because of existing model" elif config.value( 'task', 'train' ) == 'train' and import_model_train_epoch1 and start_epoch in [ None, 1 ]: epoch_model = (0, import_model_train_epoch1) # Now, consider this also in the case when we train, as an initial model import. elif load_model_epoch_filename: # Don't use the model epoch as the start epoch in training. # We use this as an import for training. epoch_model = ( model_epoch_from_filename(load_model_epoch_filename), load_model_epoch_filename) else: epoch_model = (None, None) if start_epoch == 1: if epoch_model[0]: # existing model print >> log.v4, "warning: there is an existing model: %s" % ( epoch_model, ) epoch_model = (None, None) elif start_epoch > 1: if epoch_model[0]: if epoch_model[0] != start_epoch - 1: print >> log.v4, "warning: start_epoch %i but there is %s" % ( start_epoch, epoch_model) epoch_model = existing_models[start_epoch - 1] cls._epoch_model = epoch_model return epoch_model
def _forward(segment_name, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return: format (output-dim,time) :rtype: numpy.ndarray """ print("Sprint forward", segment_name, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). num_time = features.shape[1] assert features.shape == (InputDim, num_time) time_a = time.time() dataset, seq_idx = features_to_dataset(features=features, segment_name=segment_name) time_b = time.time() if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, dataset=dataset, seq=seq_idx) assert success, "failed to allocate & assign data for seq %i, %s" % ( seq_idx, segment_name) time_c = time.time() # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx) else: raise NotImplementedError("unknown backend engine") time_d = time.time() # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time, time.time() - time_a, time.time() - time_b, time.time() - time_c, time.time() - time_d) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid( ) debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid( ) print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)
def model_filename_postfix(cls): fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix = ".meta" return fn_postfix
def __init__(self, corpus_file, orth_symbols_file=None, orth_symbols_map_file=None, orth_replace_map_file=None, word_based=False, seq_end_symbol="[END]", unknown_symbol="[UNKNOWN]", parse_orth_opts=None, phone_info=None, add_random_phone_seqs=0, partition_epoch=1, auto_replace_unknown_symbol=False, log_auto_replace_unknown_symbols=10, log_skipped_seqs=10, error_on_invalid_seq=True, add_delayed_seq_data=False, delayed_seq_data_start_symbol="[START]", **kwargs): """ :param str|()->str corpus_file: Bliss XML or line-based txt. optionally can be gzip. :param dict|None phone_info: if you want to get phone seqs, dict with lexicon_file etc. see PhoneSeqGenerator :param str|()->str|None orth_symbols_file: list of orthography symbols, if you want to get orth symbol seqs :param str|()->str|None orth_symbols_map_file: list of orth symbols, each line: "symbol index" :param str|()->str|None orth_replace_map_file: JSON file with replacement dict for orth symbols :param bool word_based: whether to parse single words, or otherwise will be char-based :param str|None seq_end_symbol: what to add at the end, if given. will be set as postfix=[seq_end_symbol] or postfix=[] for parse_orth_opts. :param dict[str]|None parse_orth_opts: kwargs for parse_orthography() :param int add_random_phone_seqs: will add random seqs with the same len as the real seq as additional data :param bool|int log_auto_replace_unknown_symbols: write about auto-replacements with unknown symbol. if this is an int, it will only log the first N replacements, and then keep quiet. :param bool|int log_skipped_seqs: write about skipped seqs to logging, due to missing lexicon entry or so. if this is an int, it will only log the first N entries, and then keep quiet. :param bool error_on_invalid_seq: if there is a seq we would have to skip, error :param bool add_delayed_seq_data: will add another data-key "delayed" which will have the sequence delayed_seq_data_start_symbol + original_sequence[:-1] :param str delayed_seq_data_start_symbol: used for add_delayed_seq_data :param int partition_epoch: whether to partition the epochs into multiple parts. like epoch_split """ super(LmDataset, self).__init__(**kwargs) if callable(corpus_file): corpus_file = corpus_file() if callable(orth_symbols_file): orth_symbols_file = orth_symbols_file() if callable(orth_symbols_map_file): orth_symbols_map_file = orth_symbols_map_file() if callable(orth_replace_map_file): orth_replace_map_file = orth_replace_map_file() print("LmDataset, loading file", corpus_file, file=log.v4) self.word_based = word_based self.seq_end_symbol = seq_end_symbol self.unknown_symbol = unknown_symbol self.parse_orth_opts = parse_orth_opts or {} self.parse_orth_opts.setdefault("word_based", self.word_based) self.parse_orth_opts.setdefault( "postfix", [self.seq_end_symbol] if self.seq_end_symbol is not None else []) if orth_symbols_file: assert not phone_info assert not orth_symbols_map_file orth_symbols = open(orth_symbols_file).read().splitlines() self.orth_symbols_map = { sym: i for (i, sym) in enumerate(orth_symbols) } self.orth_symbols = orth_symbols self.labels["data"] = orth_symbols self.seq_gen = None elif orth_symbols_map_file: assert not phone_info orth_symbols_imap_list = [(int(b), a) for (a, b) in [ l.split(None, 1) for l in open(orth_symbols_map_file).read().splitlines() ]] orth_symbols_imap_list.sort() assert orth_symbols_imap_list[0][0] == 0 assert orth_symbols_imap_list[-1][0] == len( orth_symbols_imap_list) - 1 self.orth_symbols_map = { sym: i for (i, sym) in orth_symbols_imap_list } self.orth_symbols = [sym for (i, sym) in orth_symbols_imap_list] self.labels["data"] = self.orth_symbols self.seq_gen = None else: assert not orth_symbols_file assert isinstance(phone_info, dict) self.seq_gen = PhoneSeqGenerator(**phone_info) self.orth_symbols = None self.labels["data"] = self.seq_gen.get_class_labels() if orth_replace_map_file: orth_replace_map = load_json(filename=orth_replace_map_file) assert isinstance(orth_replace_map, dict) self.orth_replace_map = { key: parse_orthography_into_symbols(v, word_based=self.word_based) for (key, v) in orth_replace_map.items() } if self.orth_replace_map: if len(self.orth_replace_map) <= 5: print(" orth_replace_map: %r" % self.orth_replace_map, file=log.v5) else: print(" orth_replace_map: %i entries" % len(self.orth_replace_map), file=log.v5) else: self.orth_replace_map = {} num_labels = len(self.labels["data"]) use_uint_types = False if BackendEngine.is_tensorflow_selected(): use_uint_types = True if num_labels <= 2**7: self.dtype = "int8" elif num_labels <= 2**8 and use_uint_types: self.dtype = "uint8" elif num_labels <= 2**31: self.dtype = "int32" elif num_labels <= 2**32 and use_uint_types: self.dtype = "uint32" elif num_labels <= 2**61: self.dtype = "int64" elif num_labels <= 2**62 and use_uint_types: self.dtype = "uint64" else: raise Exception("cannot handle so much labels: %i" % num_labels) self.num_outputs = {"data": [len(self.labels["data"]), 1]} self.num_inputs = self.num_outputs["data"][0] self.seq_order = None self.auto_replace_unknown_symbol = auto_replace_unknown_symbol self.log_auto_replace_unknown_symbols = log_auto_replace_unknown_symbols self.log_skipped_seqs = log_skipped_seqs self.error_on_invalid_seq = error_on_invalid_seq self.partition_epoch = partition_epoch self.add_random_phone_seqs = add_random_phone_seqs for i in range(add_random_phone_seqs): self.num_outputs["random%i" % i] = self.num_outputs["data"] self.add_delayed_seq_data = add_delayed_seq_data self.delayed_seq_data_start_symbol = delayed_seq_data_start_symbol if add_delayed_seq_data: self.num_outputs["delayed"] = self.num_outputs["data"] if _is_bliss(corpus_file): iter_f = _iter_bliss else: iter_f = _iter_txt self.orths = [] iter_f(corpus_file, self.orths.append) # It's only estimated because we might filter some out or so. self._estimated_num_seqs = len(self.orths) // self.partition_epoch print(" done, loaded %i sequences" % len(self.orths), file=log.v4)
def initBase(configfile=None, targetMode=None, epoch=None): """ :param str|None configfile: filename, via init(), this is set :param str|None targetMode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.initBetterExchook() rnn.initThreadJoinHack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.initConfig(configFilename=configfile) config = rnn.config rnn.initLog() rnn.returnnGreeting(configFilename=configfile) rnn.initBackendEngine() rnn.initFaulthandler(sigusr1_chain=True) rnn.initConfigJsonNetwork() if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. import TFEngine global Engine Engine = TFEngine.Engine import atexit atexit.register(_at_exit_handler) if targetMode: setTargetMode(targetMode) initDataset() if targetMode and targetMode == "forward" and epoch: model_filename = config.value('model', '') fns = [ Engine.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True] ] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert Engine.get_epoch_model(config)[1] == model_epoch_filename, \ "%r != %r" % (Engine.get_epoch_model(config), model_epoch_filename) global engine if not engine: devices = rnn.initDevices() rnn.printTaskProperties(devices) rnn.initEngine(devices) engine = rnn.engine assert isinstance(engine, Engine)
def num_inputs_outputs_from_config(cls, config): """ :type config: Config.Config :returns (num_inputs, num_outputs), where num_inputs is like num_outputs["data"][0], and num_outputs is a dict of data_key -> (dim, ndim), where data_key is e.g. "classes" or "data", dim is the feature dimension or the number of classes, and ndim is the ndim counted without batch-dim, i.e. ndim=1 means usually sparse data and ndim=2 means dense data. :rtype: (int,dict[str,(int,int)]) """ from Util import BackendEngine num_inputs = config.int('num_inputs', 0) target = config.value('target', 'classes') if config.is_typed('num_outputs'): num_outputs = config.typed_value('num_outputs') if not isinstance(num_outputs, dict): num_outputs = {target: num_outputs} num_outputs = num_outputs.copy() from Dataset import convert_data_dims num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected()) if "data" in num_outputs: num_inputs = num_outputs["data"] if isinstance(num_inputs, (list, tuple)): num_inputs = num_inputs[0] elif isinstance(num_inputs, dict): if "dim" in num_inputs: num_inputs = num_inputs["dim"] else: num_inputs = num_inputs["shape"][-1] else: raise TypeError("data key %r" % num_inputs) elif config.has('num_outputs'): num_outputs = {target: [config.int('num_outputs', 0), 1]} else: num_outputs = None dataset = None if config.list('train') and ":" not in config.value('train', ''): dataset = config.list('train')[0] if not config.is_typed('num_outputs') and dataset: # noinspection PyBroadException try: _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1) except Exception: _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1) # noinspection PyBroadException try: _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]} except Exception: _num_outputs = hdf5_group(dataset, 'targets/size') for k in _num_outputs: _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))] if num_inputs: assert num_inputs == _num_inputs if num_outputs: assert num_outputs == _num_outputs num_inputs = _num_inputs num_outputs = _num_outputs if not num_inputs and not num_outputs and config.has("load") and BackendEngine.is_theano_selected(): from Network import LayerNetwork import h5py model = h5py.File(config.value("load", ""), "r") # noinspection PyProtectedMember num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model) assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train" return num_inputs, num_outputs
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) assert rnn.config config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [ EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True] ] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)