def initDevices(): """ :rtype: list[Device] """ oldDeviceConfig = ",".join(config.list('device', ['default'])) if BackendEngine.is_tensorflow_selected(): if os.environ.get("TF_DEVICE"): config.set("device", os.environ.get("TF_DEVICE")) print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), oldDeviceConfig), file=log.v4) if not BackendEngine.is_theano_selected(): return None if config.value("task", "train") == "nop": return [] if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % \ (TheanoFlags["device"], oldDeviceConfig), file=log.v4) devArgs = getDevicesInitArgs(config) assert len(devArgs) > 0 devices = [Device(**kwargs) for kwargs in devArgs] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices
def forward(segmentName, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return numpy.ndarray, format (output-dim,time) """ print("Sprint forward", segmentName, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). T = features.shape[1] assert features.shape == (InputDim, T) # Fill the data for the current segment. sprintDataset.shuffle_frames_of_nseqs = 0 # We must not shuffle. sprintDataset.initSprintEpoch(None) # Reset cache. We don't need old seqs anymore. sprintDataset.init_seq_order() seq = sprintDataset.addNewData(features, segmentName=segmentName) if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, sprintDataset, seq) assert success, "failed to allocate & assign data for seq %i, %s" % (seq, segmentName) # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=sprintDataset, seq_idx=seq) else: raise NotImplementedError("unknown backend engine") # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (T, 1, OutputDim) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (T, OutputDim) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim, T) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid() debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid() print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def init_theano_devices(): """ Only for Theano. :rtype: list[Device.Device]|None """ if not BackendEngine.is_theano_selected(): return None from Util import TheanoFlags from Config import get_devices_init_args from Device import Device old_device_config = ",".join(config.list('device', ['default'])) if config.value("task", "train") == "nop": return [] if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], old_device_config), file=log.v4) dev_args = get_devices_init_args(config) assert len(dev_args) > 0 devices = [Device(**kwargs) for kwargs in dev_args] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % (os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get( "CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault( "gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % (socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in [ "grad", "param" ], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debugRegisterBetterRepr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debugRegisterBetterRepr() else: raise NotImplementedError
def _forward(segment_name, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return: format (output-dim,time) :rtype: numpy.ndarray """ print("Sprint forward", segment_name, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). num_time = features.shape[1] assert features.shape == (InputDim, num_time) dataset, seq_idx = features_to_dataset(features=features, segment_name=segment_name) if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, dataset=dataset, seq=seq_idx) assert success, "failed to allocate & assign data for seq %i, %s" % (seq_idx, segment_name) # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx) else: raise NotImplementedError("unknown backend engine") # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid() debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid() print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def finalize(): global quit quit = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def finalize(): print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit quit = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def dumpFlags(): print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES")) print("CUDA_LAUNCH_BLOCKING:", os.environ.get("CUDA_LAUNCH_BLOCKING")) if BackendEngine.is_theano_selected(): print("available GPUs:", get_gpu_names()) from theano.sandbox import cuda as theano_cuda print("CUDA via", theano_cuda.__file__) print("CUDA available:", theano_cuda.cuda_available) print("THEANO_FLAGS:", rnn.TheanoFlags)
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) from TFUtil import debugRegisterBetterRepr debugRegisterBetterRepr() else: raise NotImplementedError
def prepareForwarding(): assert engine assert config # Should already be set via setTargetMode(). assert config.list('extract') == ["posteriors"], "You need to have extract = posteriors in your CRNN config. " + \ "You have: %s" % config.list('extract') # Load network. engine.init_network_from_config(config) # Copy over net params. if BackendEngine.is_theano_selected(): engine.devices[0].prepare(engine.network)
def initBackendEngine(): BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print >> log.v3, "Theano:", describe_theano_version() elif BackendEngine.is_tensorflow_selected(): print >> log.v3, "TensorFlow:", describe_tensorflow_version() from Util import to_bool from TFUtil import debugRegisterBetterRepr if os.environ.get("DEBUG_TF_BETTER_REPR") and to_bool( os.environ.get("DEBUG_TF_BETTER_REPR")): debugRegisterBetterRepr() else: raise NotImplementedError
def _prepare_forwarding(): assert engine assert config # Should already be set via setTargetMode(). assert config.list('extract') == ["posteriors"], "You need to have extract = posteriors in your CRNN config. " + \ "You have: %s" % config.list('extract') # Load network. engine.init_network_from_config(config) # Copy over net params. if BackendEngine.is_theano_selected(): engine.devices[0].prepare(engine.network)
def initEngine(devices): """ :type devices: list[Device] Initializes global engine. """ global engine if BackendEngine.is_theano_selected(): engine = Engine(devices) elif BackendEngine.is_tensorflow_selected(): import TFEngine engine = TFEngine.Engine(config=config) else: raise NotImplementedError
def init_backend_engine(): """ Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`. """ BackendEngine.select_engine(config=config) if BackendEngine.is_theano_selected(): print("Theano:", describe_theano_version(), file=log.v3) import TheanoUtil TheanoUtil.monkey_patches() elif BackendEngine.is_tensorflow_selected(): print("TensorFlow:", describe_tensorflow_version(), file=log.v3) if get_tensorflow_version_tuple()[0] == 0: print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2) if os.environ.get("TF_DEVICE"): print("Devices: Use %s via TF_DEVICE instead of %s." % ( os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4) config.set("device", os.environ.get("TF_DEVICE")) if config.is_true("use_horovod"): import socket # noinspection PyPackageRequirements,PyUnresolvedReferences import horovod.tensorflow as hvd from TFUtil import init_horovod init_horovod() # make sure it is initialized if "gpu" in config.value("device", "") or os.environ.get("CUDA_VISIBLE_DEVICES", ""): # We assume that we want to use a GPU. gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault("gpu_options", {}) assert "visible_device_list" not in gpu_opts gpu_opts["visible_device_list"] = str(hvd.local_rank()) print("Horovod: Hostname %s, pid %i, using GPU %s." % ( socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3) else: if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Not using GPU.", file=log.v3) horovod_reduce_type = config.value("horovod_reduce_type", "") if horovod_reduce_type == "": horovod_reduce_type = "grad" config.set("horovod_reduce_type", horovod_reduce_type) else: assert horovod_reduce_type in ["grad", "param"], "config option 'horovod_reduce_type' invalid" if hvd.rank() == 0: # Don't spam in all ranks. print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3) from TFUtil import debug_register_better_repr, setup_tf_thread_pools, print_available_devices tf_session_opts = config.typed_value("tf_session_opts", {}) assert isinstance(tf_session_opts, dict) # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch. setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts) # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts. print_available_devices(tf_session_opts=tf_session_opts, file=log.v2) debug_register_better_repr() else: raise NotImplementedError
def finalize(): """ Cleanup at the end. """ print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit_returnn quit_returnn = True sys.exited = True if engine: if BackendEngine.is_theano_selected(): for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): engine.finalize()
def finalize(): """ Cleanup at the end. """ print("Quitting", file=getattr(log, "v4", sys.stderr)) global quit_returnn quit_returnn = True sys.exited = True if BackendEngine.is_theano_selected(): if engine: for device in engine.devices: device.terminate() elif BackendEngine.is_tensorflow_selected(): if engine: engine.finalize()
def init_engine(devices): """ Initializes global engine. :type devices: list[Device.Device]|None """ global engine if BackendEngine.is_theano_selected(): import Engine engine = Engine.Engine(devices) elif BackendEngine.is_tensorflow_selected(): import TFEngine engine = TFEngine.Engine(config=config) else: raise NotImplementedError
def dump_flags(): """ Dump some relevant env flags. """ print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES")) print("CUDA_LAUNCH_BLOCKING:", os.environ.get("CUDA_LAUNCH_BLOCKING")) if BackendEngine.is_theano_selected(): print("available GPUs:", get_gpu_names()) # noinspection PyUnresolvedReferences,PyPackageRequirements from theano.sandbox import cuda as theano_cuda print("CUDA via", theano_cuda.__file__) print("CUDA available:", theano_cuda.cuda_available) from Util import TheanoFlags print("THEANO_FLAGS:", TheanoFlags)
def init(config_filename=None, command_line_options=(), config_updates=None, extra_greeting=None): """ :param str|None config_filename: :param tuple[str]|list[str]|None command_line_options: e.g. sys.argv[1:] :param dict[str]|None config_updates: see :func:`init_config` :param str|None extra_greeting: """ init_better_exchook() init_thread_join_hack() init_config(config_filename=config_filename, command_line_options=command_line_options, extra_updates=config_updates) if config.bool("patch_atfork", False): from Util import maybe_restart_returnn_with_atfork_patch maybe_restart_returnn_with_atfork_patch() init_log() if extra_greeting: print(extra_greeting, file=log.v1) returnn_greeting(config_filename=config_filename, command_line_options=command_line_options) init_faulthandler() init_backend_engine() if BackendEngine.is_theano_selected(): if config.value('task', 'train') == "theano_graph": config.set("multiprocessing", False) if config.bool('multiprocessing', True): init_cuda_not_in_main_proc_check() if config.bool('ipython', False): init_ipython_kernel() init_config_json_network() devices = init_theano_devices() if need_data(): init_data() print_task_properties(devices) if config.value('task', 'train') == 'server': import Server global server server = Server.Server(config) else: init_engine(devices)
def init(configFilename=None, commandLineOptions=(), config_updates=None, extra_greeting=None): """ :param str|None configFilename: :param tuple[str]|list[str]|None commandLineOptions: e.g. sys.argv[1:] :param dict[str]|None config_updates: see :func:`initConfig` :param str|None extra_greeting: """ initBetterExchook() initThreadJoinHack() initConfig(configFilename=configFilename, commandLineOptions=commandLineOptions, extra_updates=config_updates) if config.bool("patch_atfork", False): from Util import maybe_restart_returnn_with_atfork_patch maybe_restart_returnn_with_atfork_patch() initLog() if extra_greeting: print(extra_greeting, file=log.v1) returnnGreeting(configFilename=configFilename, commandLineOptions=commandLineOptions) initFaulthandler() initBackendEngine() if BackendEngine.is_theano_selected(): if config.value('task', 'train') == "theano_graph": config.set("multiprocessing", False) if config.bool('multiprocessing', True): initCudaNotInMainProcCheck() if config.bool('ipython', False): initIPythonKernel() initConfigJsonNetwork() devices = initTheanoDevices() if needData(): initData() printTaskProperties(devices) if config.value('task', 'train') == 'server': import Server global server server = Server.Server(config) else: initEngine(devices)
def init(configFilename=None, commandLineOptions=(), config_updates=None, extra_greeting=None): """ :param str|None configFilename: :param tuple[str]|list[str]|None commandLineOptions: :param dict[str]|None config_updates: :param str|None extra_greeting: """ initBetterExchook() initThreadJoinHack() initConfig(configFilename=configFilename, commandLineOptions=commandLineOptions) if config_updates: config.update(config_updates) initLog() if extra_greeting: print(extra_greeting, file=log.v1) crnnGreeting(configFilename=configFilename, commandLineOptions=commandLineOptions) initBackendEngine() initFaulthandler() if BackendEngine.is_theano_selected(): if config.value('task', 'train') == "theano_graph": config.set("multiprocessing", False) if config.bool('multiprocessing', True): initCudaNotInMainProcCheck() if config.bool('ipython', False): initIPythonKernel() initConfigJsonNetwork() devices = initDevices() if needData(): initData() printTaskProperties(devices) if config.value('task', 'train') == 'server': server = Server.Server(config) else: initEngine(devices)
def init(configFilename=None, commandLineOptions=()): initBetterExchook() initThreadJoinHack() initConfig(configFilename=configFilename, commandLineOptions=commandLineOptions) initLog() crnnGreeting() initBackendEngine() initFaulthandler() if BackendEngine.is_theano_selected(): if config.value('task', 'train') == "theano_graph": config.set("multiprocessing", False) if config.bool('multiprocessing', True): initCudaNotInMainProcCheck() if config.bool('ipython', False): initIPythonKernel() initConfigJsonNetwork() devices = initDevices() if needData(): initData() printTaskProperties(devices) initEngine(devices)
def _forward(segment_name, features): """ :param numpy.ndarray features: format (input-feature,time) (via Sprint) :return: format (output-dim,time) :rtype: numpy.ndarray """ print("Sprint forward", segment_name, features.shape) start_time = time.time() assert engine is not None, "not initialized" assert sprintDataset # Features are in Sprint format (feature,time). num_time = features.shape[1] assert features.shape == (InputDim, num_time) time_a = time.time() dataset, seq_idx = features_to_dataset(features=features, segment_name=segment_name) time_b = time.time() if BackendEngine.is_theano_selected(): # Prepare data for device. device = engine.devices[0] success = assign_dev_data_single_seq(device, dataset=dataset, seq=seq_idx) assert success, "failed to allocate & assign data for seq %i, %s" % ( seq_idx, segment_name) time_c = time.time() # Do the actual forwarding and collect result. device.run("extract") result, _ = device.result() assert result is not None, "Device crashed." assert len(result) == 1 posteriors = result[0] elif BackendEngine.is_tensorflow_selected(): posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx) else: raise NotImplementedError("unknown backend engine") time_d = time.time() # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission). if posteriors.ndim == 3: assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength) posteriors = posteriors[:, 0] # Posteriors are in format (time,emission). assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength) # Reformat to Sprint expected format (emission,time). posteriors = posteriors.transpose() assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time) stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors)) print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time, time.time() - time_a, time.time() - time_b, time.time() - time_c, time.time() - time_d) if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any(): print("posteriors:", posteriors) debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid( ) debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid( ) print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn)) numpy.savetxt(debug_feat_fn, features) numpy.savetxt(debug_post_fn, posteriors) assert False, "Error, posteriors contain invalid numbers." return posteriors
def get_batch_loss_and_error_signal(self, log_posteriors, seq_lengths, tags=None): """ :param numpy.ndarray log_posteriors: 3d (time,batch,label) :param numpy.ndarray seq_lengths: 1d (batch) :param list[str] tags: seq names, length = batch :rtype (numpy.ndarray, numpy.ndarray) :returns (loss, error_signal). error_signal has the same shape as posteriors. loss is a 1d-array (batch). Note that this accesses some global references, like global current seg info, via the current Device instance. Thus this is expected to be run from the Device host proc, inside from SprintErrorSigOp.perform. This also expects that we don't have chunked seqs. """ assert seq_lengths.ndim == 1 assert log_posteriors.ndim == 3 n_batch = seq_lengths.shape[0] assert n_batch == log_posteriors.shape[1] if tags is None: import Device assert Device.is_device_host_proc() tags = Device.get_current_seq_tags() assert len(tags) == n_batch batch_loss = numpy.zeros((n_batch,), dtype="float32") batch_error_signal = numpy.zeros_like(log_posteriors, dtype="float32") # greedy solution to the scheduling problem sorted_length = sorted(enumerate(seq_lengths),key=lambda x:x[1],reverse=True) jobs = [ [] for i in range(self.max_num_instances) ] joblen = [0]*self.max_num_instances for i,l in sorted_length: j = min(enumerate(joblen),key=lambda x:x[1])[0] jobs[j].append(i) joblen[j]+=l if not BackendEngine.is_theano_selected() and self.max_num_instances > 1: threads = [ReaderThread(self._get_instance(i), i, jobs[i], tags, seq_lengths, log_posteriors, batch_loss, batch_error_signal) for i in range(self.max_num_instances)] for i,thread in enumerate(threads): thread.join() if thread.exception: raise thread.exception else: # Very simple parallelism. We must avoid any form of multi-threading # because this can be problematic with Theano. # See: https://groups.google.com/forum/#!msg/theano-users/Pu4YKlZKwm4/eNcAegzaNeYJ # We also try to keep it simple here. for bb in range(0, n_batch, self.max_num_instances): for i in range(self.max_num_instances): b = bb + i if b >= n_batch: break instance = self._get_instance(i) instance.get_loss_and_error_signal__send( seg_name=tags[b], seg_len=seq_lengths[b], log_posteriors=log_posteriors[:seq_lengths[b], b]) for i in range(self.max_num_instances): b = bb + i if b >= n_batch: break instance = self._get_instance(i) seg_name, loss, error_signal = instance.get_loss_and_error_signal__read() assert seg_name == tags[b] batch_loss[b] = loss batch_error_signal[:seq_lengths[b], b] = error_signal numpy_set_unused(error_signal) return batch_loss, batch_error_signal
del self.loss_data_queue[:idx] def have_seqs_loss_data(self, start_seq, end_seq): assert start_seq <= end_seq if start_seq == end_seq: return True first_seq, last_seq = start_seq, end_seq - 1 have_first, have_last = False, False for loss_data in self.loss_data_queue: if loss_data.seq_idx == first_seq: have_first = True if loss_data.seq_idx == last_seq: have_last = True if have_last: assert have_first # otherwise, we removed the cache already although we still need it return have_first and have_last if BackendEngine.is_theano_selected(): # noinspection PyPackageRequirements,PyUnresolvedReferences import theano # noinspection PyPackageRequirements,PyUnresolvedReferences import theano.tensor as T class SprintErrorSigOp(theano.Op): """ Op: log_posteriors, seq_lengths -> loss, error_signal (grad w.r.t. z, i.e. before softmax is applied) """ __props__ = ("sprint_opts",) def __init__(self, sprint_opts): super(SprintErrorSigOp, self).__init__()
""" This file is going to be imported by Debug.debug_shell() and available as interactive commands. """ import sys import numpy import h5py from Util import BackendEngine if BackendEngine.is_theano_selected(): # noinspection PyUnresolvedReferences,PyPackageRequirements import theano # noinspection PyUnresolvedReferences,PyPackageRequirements import theano.tensor as tt # noinspection PyUnresolvedReferences,PyPackageRequirements import theano.sandbox.cuda as cuda from TheanoUtil import make_var_tuple from Network import LayerNetwork else: theano = None def find_obj_in_stack(cls, stack=None, all_threads=True): """ :param type cls: :param types.FrameType|traceback.FrameSummary stack: :param bool all_threads: :return: obj """ if all_threads: assert stack is None
def num_inputs_outputs_from_config(cls, config): """ :type config: Config.Config :returns (num_inputs, num_outputs), where num_inputs is like num_outputs["data"][0], and num_outputs is a dict of data_key -> (dim, ndim), where data_key is e.g. "classes" or "data", dim is the feature dimension or the number of classes, and ndim is the ndim counted without batch-dim, i.e. ndim=1 means usually sparse data and ndim=2 means dense data. :rtype: (int,dict[str,(int,int)]) """ from Util import BackendEngine num_inputs = config.int('num_inputs', 0) target = config.value('target', 'classes') if config.is_typed('num_outputs'): num_outputs = config.typed_value('num_outputs') if not isinstance(num_outputs, dict): num_outputs = {target: num_outputs} num_outputs = num_outputs.copy() from Dataset import convert_data_dims num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected()) if "data" in num_outputs: num_inputs = num_outputs["data"] if isinstance(num_inputs, (list, tuple)): num_inputs = num_inputs[0] elif isinstance(num_inputs, dict): if "dim" in num_inputs: num_inputs = num_inputs["dim"] else: num_inputs = num_inputs["shape"][-1] else: raise TypeError("data key %r" % num_inputs) elif config.has('num_outputs'): num_outputs = {target: [config.int('num_outputs', 0), 1]} else: num_outputs = None dataset = None if config.list('train') and ":" not in config.value('train', ''): dataset = config.list('train')[0] if not config.is_typed('num_outputs') and dataset: # noinspection PyBroadException try: _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1) except Exception: _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1) # noinspection PyBroadException try: _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]} except Exception: _num_outputs = hdf5_group(dataset, 'targets/size') for k in _num_outputs: _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))] if num_inputs: assert num_inputs == _num_inputs if num_outputs: assert num_outputs == _num_outputs num_inputs = _num_inputs num_outputs = _num_outputs if not num_inputs and not num_outputs and config.has("load") and BackendEngine.is_theano_selected(): from Network import LayerNetwork import h5py model = h5py.File(config.value("load", ""), "r") # noinspection PyProtectedMember num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model) assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train" return num_inputs, num_outputs
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) assert rnn.config config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [ EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True] ] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None): """ :param str|None configfile: filename, via init(), this is set :param str|None target_mode: "forward" or so. via init(), this is set :param int epoch: via init(), this is set :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile """ global isInitialized isInitialized = True # Run through in any case. Maybe just to set targetMode. if not getattr(sys, "argv", None): # Set some dummy. Some code might want this (e.g. TensorFlow). sys.argv = [__file__] global config if not config: # Some subset of what we do in rnn.init(). rnn.init_better_exchook() rnn.init_thread_join_hack() if configfile is None: configfile = DefaultSprintCrnnConfig assert os.path.exists(configfile) rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode}) config = rnn.config if sprint_opts is not None: config.update(sprint_opts) rnn.init_log() rnn.returnn_greeting(config_filename=configfile) rnn.init_backend_engine() rnn.init_faulthandler(sigusr1_chain=True) rnn.init_config_json_network() global Engine if BackendEngine.is_tensorflow_selected(): # Use TFEngine.Engine class instead of Engine.Engine. from TFEngine import Engine elif BackendEngine.is_theano_selected(): from Engine import Engine import atexit atexit.register(_at_exit_handler) if target_mode: set_target_mode(target_mode) _init_dataset() if target_mode and target_mode == "forward" and epoch: model_filename = config.value('model', '') fns = [EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]] fn_postfix = "" if BackendEngine.is_tensorflow_selected(): fn_postfix += ".meta" fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)] assert len(fns_existing) == 1, "%s not found" % fns model_epoch_filename = fns_existing[0] config.set('load', model_epoch_filename) assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, ( "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename)) global engine if not engine: devices = rnn.init_theano_devices() rnn.print_task_properties(devices) rnn.init_engine(devices) engine = rnn.engine assert isinstance(engine, Engine)
def num_inputs_outputs_from_config(cls, config): """ :type config: Config.Config :returns (num_inputs, num_outputs), where num_inputs is like num_outputs["data"][0], and num_outputs is a dict of data_key -> (dim, ndim), where data_key is e.g. "classes" or "data", dim is the feature dimension or the number of classes, and ndim is the ndim counted without batch-dim, i.e. ndim=1 means usually sparse data and ndim=2 means dense data. :rtype: (int,dict[str,(int,int)]) """ from Util import BackendEngine num_inputs = config.int('num_inputs', 0) target = config.value('target', 'classes') if config.is_typed('num_outputs'): num_outputs = config.typed_value('num_outputs') if not isinstance(num_outputs, dict): num_outputs = {target: num_outputs} num_outputs = num_outputs.copy() from Dataset import convert_data_dims num_outputs = convert_data_dims( num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected()) if "data" in num_outputs: num_inputs = num_outputs["data"] if isinstance(num_inputs, (list, tuple)): num_inputs = num_inputs[0] elif isinstance(num_inputs, dict): if "dim" in num_inputs: num_inputs = num_inputs["dim"] else: num_inputs = num_inputs["shape"][-1] else: raise TypeError("data key %r" % num_inputs) elif config.has('num_outputs'): num_outputs = {target: [config.int('num_outputs', 0), 1]} else: num_outputs = None dataset = None if config.list('train') and ":" not in config.value('train', ''): dataset = config.list('train')[0] if not config.is_typed('num_outputs') and dataset: # noinspection PyBroadException try: _num_inputs = hdf5_dimension( dataset, 'inputCodeSize') * config.int('window', 1) except Exception: _num_inputs = hdf5_dimension( dataset, 'inputPattSize') * config.int('window', 1) # noinspection PyBroadException try: _num_outputs = { target: [hdf5_dimension(dataset, 'numLabels'), 1] } except Exception: _num_outputs = hdf5_group(dataset, 'targets/size') for k in _num_outputs: _num_outputs[k] = [ _num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k)) ] if num_inputs: assert num_inputs == _num_inputs if num_outputs: assert num_outputs == _num_outputs num_inputs = _num_inputs num_outputs = _num_outputs if not num_inputs and not num_outputs and config.has( "load") and BackendEngine.is_theano_selected(): from Network import LayerNetwork import h5py model = h5py.File(config.value("load", ""), "r") # noinspection PyProtectedMember num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model( model) assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train" return num_inputs, num_outputs
def get_batch_loss_and_error_signal(self, log_posteriors, seq_lengths, tags=None): """ :param numpy.ndarray log_posteriors: 3d (time,batch,label) :param numpy.ndarray seq_lengths: 1d (batch) :param list[str] tags: seq names, length = batch :rtype (numpy.ndarray, numpy.ndarray) :returns (loss, error_signal). error_signal has the same shape as posteriors. loss is a 1d-array (batch). Note that this accesses some global references, like global current seg info, via the current Device instance. Thus this is expected to be run from the Device host proc, inside from SprintErrorSigOp.perform. This also expects that we don't have chunked seqs. """ assert seq_lengths.ndim == 1 assert log_posteriors.ndim == 3 n_batch = seq_lengths.shape[0] assert n_batch == log_posteriors.shape[1] if tags is None: import Device assert Device.is_device_host_proc() tags = Device.get_current_seq_tags() assert len(tags) == n_batch batch_loss = numpy.zeros((n_batch, ), dtype="float32") batch_error_signal = numpy.zeros_like(log_posteriors, dtype="float32") # greedy solution to the scheduling problem sorted_length = sorted(enumerate(seq_lengths), key=lambda x: x[1], reverse=True) jobs = [[] for i in range(self.max_num_instances)] joblen = [0] * self.max_num_instances for i, l in sorted_length: j = min(enumerate(joblen), key=lambda x: x[1])[0] jobs[j].append(i) joblen[j] += l if not BackendEngine.is_theano_selected( ) and self.max_num_instances > 1: threads = [ ReaderThread(self._get_instance(i), i, jobs[i], tags, seq_lengths, log_posteriors, batch_loss, batch_error_signal) for i in range(self.max_num_instances) ] for i, thread in enumerate(threads): thread.join() if thread.exception: raise thread.exception else: # Very simple parallelism. We must avoid any form of multi-threading # because this can be problematic with Theano. # See: https://groups.google.com/forum/#!msg/theano-users/Pu4YKlZKwm4/eNcAegzaNeYJ # We also try to keep it simple here. for bb in range(0, n_batch, self.max_num_instances): for i in range(self.max_num_instances): b = bb + i if b >= n_batch: break instance = self._get_instance(i) instance.get_loss_and_error_signal__send( seg_name=tags[b], seg_len=seq_lengths[b], log_posteriors=log_posteriors[:seq_lengths[b], b]) for i in range(self.max_num_instances): b = bb + i if b >= n_batch: break instance = self._get_instance(i) seg_name, loss, error_signal = instance.get_loss_and_error_signal__read( ) assert seg_name == tags[b] batch_loss[b] = loss batch_error_signal[:seq_lengths[b], b] = error_signal numpy_set_unused(error_signal) return batch_loss, batch_error_signal