Beispiel #1
0
def forward(segmentName, features):
  """
  :param numpy.ndarray features: format (input-feature,time) (via Sprint)
  :return numpy.ndarray, format (output-dim,time)
  """
  print("Sprint forward", segmentName, features.shape)
  start_time = time.time()
  assert engine is not None, "not initialized"
  assert sprintDataset

  # Features are in Sprint format (feature,time).
  T = features.shape[1]
  assert features.shape == (InputDim, T)

  # Fill the data for the current segment.
  sprintDataset.shuffle_frames_of_nseqs = 0  # We must not shuffle.
  sprintDataset.initSprintEpoch(None)  # Reset cache. We don't need old seqs anymore.
  sprintDataset.init_seq_order()
  seq = sprintDataset.addNewData(features, segmentName=segmentName)

  if BackendEngine.is_theano_selected():
    # Prepare data for device.
    device = engine.devices[0]
    success = assign_dev_data_single_seq(device, sprintDataset, seq)
    assert success, "failed to allocate & assign data for seq %i, %s" % (seq, segmentName)

    # Do the actual forwarding and collect result.
    device.run("extract")
    result, _ = device.result()
    assert result is not None, "Device crashed."
    assert len(result) == 1
    posteriors = result[0]

  elif BackendEngine.is_tensorflow_selected():
    posteriors = engine.forward_single(dataset=sprintDataset, seq_idx=seq)

  else:
    raise NotImplementedError("unknown backend engine")

  # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission).
  if posteriors.ndim == 3:
    assert posteriors.shape == (T, 1, OutputDim)
    posteriors = posteriors[:, 0]
  # Posteriors are in format (time,emission).
  assert posteriors.shape == (T, OutputDim)
  # Reformat to Sprint expected format (emission,time).
  posteriors = posteriors.transpose()
  assert posteriors.shape == (OutputDim, T)
  stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors))
  print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time)
  if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any():
    print("posteriors:", posteriors)
    debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid()
    debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid()
    print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn))
    numpy.savetxt(debug_feat_fn, features)
    numpy.savetxt(debug_post_fn, posteriors)
    assert False, "Error, posteriors contain invalid numbers."

  return posteriors
Beispiel #2
0
def initDevices():
    """
  :rtype: list[Device]
  """
    oldDeviceConfig = ",".join(config.list('device', ['default']))
    if BackendEngine.is_tensorflow_selected():
        if os.environ.get("TF_DEVICE"):
            config.set("device", os.environ.get("TF_DEVICE"))
            print("Devices: Use %s via TF_DEVICE instead of %s." %
                  (os.environ.get("TF_DEVICE"), oldDeviceConfig),
                  file=log.v4)
    if not BackendEngine.is_theano_selected():
        return None
    if config.value("task", "train") == "nop":
        return []
    if "device" in TheanoFlags:
        # This is important because Theano likely already has initialized that device.
        config.set("device", TheanoFlags["device"])
        print("Devices: Use %s via THEANO_FLAGS instead of %s." % \
                         (TheanoFlags["device"], oldDeviceConfig), file=log.v4)
    devArgs = getDevicesInitArgs(config)
    assert len(devArgs) > 0
    devices = [Device(**kwargs) for kwargs in devArgs]
    for device in devices:
        while not device.initialized:
            time.sleep(0.25)
    if devices[0].blocking:
        print("Devices: Used in blocking / single proc mode.", file=log.v4)
    else:
        print("Devices: Used in multiprocessing mode.", file=log.v4)
    return devices
Beispiel #3
0
def initBackendEngine():
    BackendEngine.select_engine(config=config)
    if BackendEngine.is_theano_selected():
        print("Theano:", describe_theano_version(), file=log.v3)
        import TheanoUtil
        TheanoUtil.monkey_patches()
    elif BackendEngine.is_tensorflow_selected():
        print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
        if get_tensorflow_version_tuple()[0] == 0:
            print("Warning: TF <1.0 is not supported and likely broken.",
                  file=log.v2)
        if os.environ.get("TF_DEVICE"):
            print("Devices: Use %s via TF_DEVICE instead of %s." %
                  (os.environ.get("TF_DEVICE"),
                   config.opt_typed_value("device")),
                  file=log.v4)
            config.set("device", os.environ.get("TF_DEVICE"))
        if config.is_true("use_horovod"):
            import socket
            import horovod.tensorflow as hvd
            from TFUtil import init_horovod
            init_horovod()  # make sure it is initialized
            if "gpu" in config.value("device", "") or os.environ.get(
                    "CUDA_VISIBLE_DEVICES", ""):
                # We assume that we want to use a GPU.
                gpu_opts = config.typed_dict.setdefault("tf_session_opts",
                                                        {}).setdefault(
                                                            "gpu_options", {})
                assert "visible_device_list" not in gpu_opts
                gpu_opts["visible_device_list"] = str(hvd.local_rank())
                print("Horovod: Hostname %s, pid %i, using GPU %s." %
                      (socket.gethostname(), os.getpid(),
                       gpu_opts["visible_device_list"]),
                      file=log.v3)
            else:
                if hvd.rank() == 0:  # Don't spam in all ranks.
                    print("Horovod: Not using GPU.", file=log.v3)
            horovod_reduce_type = config.value("horovod_reduce_type", "")
            if horovod_reduce_type == "":
                horovod_reduce_type = "grad"
                config.set("horovod_reduce_type", horovod_reduce_type)
            else:
                assert horovod_reduce_type in [
                    "grad", "param"
                ], "config option 'horovod_reduce_type' invalid"
            if hvd.rank() == 0:  # Don't spam in all ranks.
                print("Horovod: Reduce type:",
                      horovod_reduce_type,
                      file=log.v3)
        from TFUtil import debugRegisterBetterRepr, setup_tf_thread_pools, print_available_devices
        tf_session_opts = config.typed_value("tf_session_opts", {})
        assert isinstance(tf_session_opts, dict)
        # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch.
        setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts)
        # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts.
        print_available_devices(tf_session_opts=tf_session_opts, file=log.v2)
        debugRegisterBetterRepr()
    else:
        raise NotImplementedError
Beispiel #4
0
def _forward(segment_name, features):
  """
  :param numpy.ndarray features: format (input-feature,time) (via Sprint)
  :return: format (output-dim,time)
  :rtype: numpy.ndarray
  """
  print("Sprint forward", segment_name, features.shape)
  start_time = time.time()
  assert engine is not None, "not initialized"
  assert sprintDataset

  # Features are in Sprint format (feature,time).
  num_time = features.shape[1]
  assert features.shape == (InputDim, num_time)
  dataset, seq_idx = features_to_dataset(features=features, segment_name=segment_name)

  if BackendEngine.is_theano_selected():
    # Prepare data for device.
    device = engine.devices[0]
    success = assign_dev_data_single_seq(device, dataset=dataset, seq=seq_idx)
    assert success, "failed to allocate & assign data for seq %i, %s" % (seq_idx, segment_name)

    # Do the actual forwarding and collect result.
    device.run("extract")
    result, _ = device.result()
    assert result is not None, "Device crashed."
    assert len(result) == 1
    posteriors = result[0]

  elif BackendEngine.is_tensorflow_selected():
    posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx)

  else:
    raise NotImplementedError("unknown backend engine")

  # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission).
  if posteriors.ndim == 3:
    assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength)
    posteriors = posteriors[:, 0]
  # Posteriors are in format (time,emission).
  assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength)
  # Reformat to Sprint expected format (emission,time).
  posteriors = posteriors.transpose()
  assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time)
  stats = (numpy.min(posteriors), numpy.max(posteriors), numpy.mean(posteriors), numpy.std(posteriors))
  print("posteriors min/max/mean/std:", stats, "time:", time.time() - start_time)
  if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any():
    print("posteriors:", posteriors)
    debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid()
    debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid()
    print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn))
    numpy.savetxt(debug_feat_fn, features)
    numpy.savetxt(debug_post_fn, posteriors)
    assert False, "Error, posteriors contain invalid numbers."

  return posteriors
Beispiel #5
0
def finalize():
    global quit
    quit = True
    sys.exited = True
    if BackendEngine.is_theano_selected():
        if engine:
            for device in engine.devices:
                device.terminate()
    elif BackendEngine.is_tensorflow_selected():
        if engine:
            engine.finalize()
Beispiel #6
0
def finalize():
    print("Quitting", file=getattr(log, "v4", sys.stderr))
    global quit
    quit = True
    sys.exited = True
    if BackendEngine.is_theano_selected():
        if engine:
            for device in engine.devices:
                device.terminate()
    elif BackendEngine.is_tensorflow_selected():
        if engine:
            engine.finalize()
Beispiel #7
0
 def num_inputs_outputs_from_config(cls, config):
   """
   :type config: Config.Config
   :returns (num_inputs, num_outputs),
      where num_inputs is like num_outputs["data"][0],
      and num_outputs is a dict of data_key -> (dim, ndim),
        where data_key is e.g. "classes" or "data",
        dim is the feature dimension or the number of classes,
        and ndim is the ndim counted without batch-dim,
        i.e. ndim=1 means usually sparse data and ndim=2 means dense data.
   :rtype: (int,dict[str,(int,int)])
   """
   num_inputs = config.int('num_inputs', 0)
   target = config.value('target', 'classes')
   if config.is_typed('num_outputs'):
     num_outputs = config.typed_value('num_outputs')
     if not isinstance(num_outputs, dict):
       num_outputs = {target: num_outputs}
     num_outputs = num_outputs.copy()
     from Dataset import convert_data_dims
     from Util import BackendEngine
     num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected())
     if "data" in num_outputs:
       num_inputs = num_outputs["data"][0]
   elif config.has('num_outputs'):
     num_outputs = {target: [config.int('num_outputs', 0), 1]}
   else:
     num_outputs = None
   dataset = None
   if config.list('train') and ":" not in config.value('train', ''):
     dataset = config.list('train')[0]
   if not config.is_typed('num_outputs') and dataset:
     try:
       _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1)
     except Exception:
       _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1)
     try:
       _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]}
     except Exception:
       _num_outputs = hdf5_group(dataset, 'targets/size')
       for k in _num_outputs:
         _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))]
     if num_inputs: assert num_inputs == _num_inputs
     if num_outputs: assert num_outputs == _num_outputs
     num_inputs = _num_inputs
     num_outputs = _num_outputs
   if not num_inputs and not num_outputs and config.has("load"):
     from Network import LayerNetwork
     import h5py
     model = h5py.File(config.value("load", ""), "r")
     num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model)
   assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train"
   return num_inputs, num_outputs
Beispiel #8
0
def initBackendEngine():
    BackendEngine.select_engine(config=config)
    if BackendEngine.is_theano_selected():
        print("Theano:", describe_theano_version(), file=log.v3)
    elif BackendEngine.is_tensorflow_selected():
        print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
        if get_tensorflow_version_tuple()[0] == 0:
            print("Warning: TF <1.0 is not supported and likely broken.",
                  file=log.v2)
        from TFUtil import debugRegisterBetterRepr
        debugRegisterBetterRepr()
    else:
        raise NotImplementedError
Beispiel #9
0
def initBackendEngine():
    BackendEngine.select_engine(config=config)
    if BackendEngine.is_theano_selected():
        print >> log.v3, "Theano:", describe_theano_version()
    elif BackendEngine.is_tensorflow_selected():
        print >> log.v3, "TensorFlow:", describe_tensorflow_version()
        from Util import to_bool
        from TFUtil import debugRegisterBetterRepr
        if os.environ.get("DEBUG_TF_BETTER_REPR") and to_bool(
                os.environ.get("DEBUG_TF_BETTER_REPR")):
            debugRegisterBetterRepr()
    else:
        raise NotImplementedError
Beispiel #10
0
def initEngine(devices):
    """
  :type devices: list[Device]
  Initializes global engine.
  """
    global engine
    if BackendEngine.is_theano_selected():
        engine = Engine(devices)
    elif BackendEngine.is_tensorflow_selected():
        import TFEngine
        engine = TFEngine.Engine(config=config)
    else:
        raise NotImplementedError
Beispiel #11
0
def init_backend_engine():
  """
  Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`.
  """
  BackendEngine.select_engine(config=config)
  if BackendEngine.is_theano_selected():
    print("Theano:", describe_theano_version(), file=log.v3)
    import TheanoUtil
    TheanoUtil.monkey_patches()
  elif BackendEngine.is_tensorflow_selected():
    print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
    if get_tensorflow_version_tuple()[0] == 0:
      print("Warning: TF <1.0 is not supported and likely broken.", file=log.v2)
    if os.environ.get("TF_DEVICE"):
      print("Devices: Use %s via TF_DEVICE instead of %s." % (
        os.environ.get("TF_DEVICE"), config.opt_typed_value("device")), file=log.v4)
      config.set("device", os.environ.get("TF_DEVICE"))
    if config.is_true("use_horovod"):
      import socket
      # noinspection PyPackageRequirements,PyUnresolvedReferences
      import horovod.tensorflow as hvd
      from TFUtil import init_horovod
      init_horovod()  # make sure it is initialized
      if "gpu" in config.value("device", "") or os.environ.get("CUDA_VISIBLE_DEVICES", ""):
        # We assume that we want to use a GPU.
        gpu_opts = config.typed_dict.setdefault("tf_session_opts", {}).setdefault("gpu_options", {})
        assert "visible_device_list" not in gpu_opts
        gpu_opts["visible_device_list"] = str(hvd.local_rank())
        print("Horovod: Hostname %s, pid %i, using GPU %s." % (
          socket.gethostname(), os.getpid(), gpu_opts["visible_device_list"]), file=log.v3)
      else:
        if hvd.rank() == 0:  # Don't spam in all ranks.
          print("Horovod: Not using GPU.", file=log.v3)
      horovod_reduce_type = config.value("horovod_reduce_type", "")
      if horovod_reduce_type == "":
        horovod_reduce_type = "grad"
        config.set("horovod_reduce_type", horovod_reduce_type)
      else:
        assert horovod_reduce_type in ["grad", "param"], "config option 'horovod_reduce_type' invalid"
      if hvd.rank() == 0:  # Don't spam in all ranks.
        print("Horovod: Reduce type:", horovod_reduce_type, file=log.v3)
    from TFUtil import debug_register_better_repr, setup_tf_thread_pools, print_available_devices
    tf_session_opts = config.typed_value("tf_session_opts", {})
    assert isinstance(tf_session_opts, dict)
    # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch.
    setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts)
    # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts.
    print_available_devices(tf_session_opts=tf_session_opts, file=log.v2)
    debug_register_better_repr()
  else:
    raise NotImplementedError
Beispiel #12
0
def finalize():
    """
  Cleanup at the end.
  """
    print("Quitting", file=getattr(log, "v4", sys.stderr))
    global quit_returnn
    quit_returnn = True
    sys.exited = True
    if engine:
        if BackendEngine.is_theano_selected():
            for device in engine.devices:
                device.terminate()
        elif BackendEngine.is_tensorflow_selected():
            engine.finalize()
Beispiel #13
0
def init_engine(devices):
  """
  Initializes global engine.

  :type devices: list[Device.Device]|None
  """
  global engine
  if BackendEngine.is_theano_selected():
    import Engine
    engine = Engine.Engine(devices)
  elif BackendEngine.is_tensorflow_selected():
    import TFEngine
    engine = TFEngine.Engine(config=config)
  else:
    raise NotImplementedError
Beispiel #14
0
def finalize():
  """
  Cleanup at the end.
  """
  print("Quitting", file=getattr(log, "v4", sys.stderr))
  global quit_returnn
  quit_returnn = True
  sys.exited = True
  if BackendEngine.is_theano_selected():
    if engine:
      for device in engine.devices:
        device.terminate()
  elif BackendEngine.is_tensorflow_selected():
    if engine:
      engine.finalize()
Beispiel #15
0
def set_config_num_inputs_outputs_from_dataset(config, dataset):
  """
  :param Config.Config config:
  :param Dataset dataset:
  """
  from Util import BackendEngine
  if BackendEngine.is_tensorflow_selected():
    # TF supports more fine-grained specification,
    # however the dataset does not store that in num_outputs.
    from TFNetwork import ExternData
    config.set("extern_data", {
      key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key)
      for key in dataset.get_data_keys()})
  else:
    config.set("num_inputs", dataset.num_inputs)
    config.set("num_outputs", dataset.num_outputs)
Beispiel #16
0
def set_config_num_inputs_outputs_from_dataset(config, dataset):
  """
  :param Config.Config config:
  :param Dataset dataset:
  """
  config.set("num_inputs", dataset.num_inputs)
  from Util import BackendEngine
  if BackendEngine.is_tensorflow_selected():
    # TF supports more fine-grained specification,
    # however the dataset does not store that in num_outputs.
    from TFNetwork import ExternData
    config.set("num_outputs", {
      key: ExternData.data_kwargs_from_dataset_key(dataset=dataset, key=key)
      for key in dataset.get_data_keys()})
  else:
    config.set("num_outputs", dataset.num_outputs)
Beispiel #17
0
 def get_existing_models(cls, config):
   model_filename = config.value('model', '')
   if not model_filename:
     return []
   # Automatically search the filesystem for existing models.
   file_list = []
   for epoch in range(1, cls.config_get_final_epoch(config) + 1):
     for is_pretrain in [False, True]:
       fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain)
       if os.path.exists(fn):
         file_list += [(epoch, fn)]  # epoch, fn
         break
       if BackendEngine.is_tensorflow_selected():
         if os.path.exists(fn + ".index"):
           file_list += [(epoch, fn)]  # epoch, fn
           break
   file_list.sort()
   return file_list
Beispiel #18
0
 def get_existing_models(cls, config):
   """
   :param Config.Config config:
   :return: dict epoch -> model filename
   :rtype: dict[int,str]
   """
   model_filename = config.value('model', '')
   if not model_filename:
     return []
   # Automatically search the filesystem for existing models.
   file_list = {}
   for epoch in range(1, cls.config_get_final_epoch(config) + 1):
     for is_pretrain in [False, True]:
       fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain)
       if os.path.exists(fn):
         file_list[epoch] = fn
         break
       if BackendEngine.is_tensorflow_selected():
         if os.path.exists(fn + ".index"):
           file_list[epoch] = fn
           break
   return file_list
Beispiel #19
0
 def get_existing_models(cls, config):
   """
   :param Config.Config config:
   :return: dict epoch -> model filename
   :rtype: dict[int,str]
   """
   model_filename = config.value('model', '')
   if not model_filename:
     return []
   # Automatically search the filesystem for existing models.
   file_list = {}
   for epoch in range(1, cls.config_get_final_epoch(config) + 1):
     for is_pretrain in [False, True]:
       fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain)
       if os.path.exists(fn):
         file_list[epoch] = fn
         break
       if BackendEngine.is_tensorflow_selected():
         if os.path.exists(fn + ".index"):
           file_list[epoch] = fn
           break
   return file_list
Beispiel #20
0
    def get_epoch_model(cls, config):
        """
    :type config: Config.Config
    :returns (epoch, modelFilename)
    :rtype: (int|None, str|None)
    """
        # XXX: We cache it, although this is wrong if we have changed the config.
        if cls._epoch_model:
            return cls._epoch_model

        start_epoch_mode = config.value('start_epoch', 'auto')
        if start_epoch_mode == 'auto':
            start_epoch = None
        else:
            start_epoch = int(start_epoch_mode)
            assert start_epoch >= 1

        load_model_epoch_filename = config.value('load', '')
        if load_model_epoch_filename:
            fn_postfix = ""
            if BackendEngine.is_tensorflow_selected():
                fn_postfix = ".meta"
            assert os.path.exists(load_model_epoch_filename + fn_postfix)

        import_model_train_epoch1 = config.value('import_model_train_epoch1',
                                                 '')
        if import_model_train_epoch1:
            assert os.path.exists(import_model_train_epoch1)

        existing_models = cls.get_existing_models(config)

        # Only use this when we don't train.
        # For training, we first consider existing models before we take the 'load' into account when in auto epoch mode.
        # In all other cases, we use the model specified by 'load'.
        if load_model_epoch_filename and (config.value('task', 'train') !=
                                          'train' or start_epoch is not None):
            epoch = model_epoch_from_filename(load_model_epoch_filename)
            if config.value('task',
                            'train') == 'train' and start_epoch is not None:
                # Ignore the epoch. To keep it consistent with the case below.
                epoch = None
            epoch_model = (epoch, load_model_epoch_filename)

        # In case of training, always first consider existing models.
        # This is because we reran CRNN training, we usually don't want to train from scratch
        # but resume where we stopped last time.
        elif existing_models:
            epoch_model = existing_models[-1]
            if load_model_epoch_filename:
                print >> log.v4, "note: there is a 'load' which we ignore because of existing model"

        elif config.value(
                'task', 'train'
        ) == 'train' and import_model_train_epoch1 and start_epoch in [
                None, 1
        ]:
            epoch_model = (0, import_model_train_epoch1)

        # Now, consider this also in the case when we train, as an initial model import.
        elif load_model_epoch_filename:
            # Don't use the model epoch as the start epoch in training.
            # We use this as an import for training.
            epoch_model = (
                model_epoch_from_filename(load_model_epoch_filename),
                load_model_epoch_filename)

        else:
            epoch_model = (None, None)

        if start_epoch == 1:
            if epoch_model[0]:  # existing model
                print >> log.v4, "warning: there is an existing model: %s" % (
                    epoch_model, )
                epoch_model = (None, None)
        elif start_epoch > 1:
            if epoch_model[0]:
                if epoch_model[0] != start_epoch - 1:
                    print >> log.v4, "warning: start_epoch %i but there is %s" % (
                        start_epoch, epoch_model)
                epoch_model = existing_models[start_epoch - 1]

        cls._epoch_model = epoch_model
        return epoch_model
Beispiel #21
0
def _forward(segment_name, features):
    """
  :param numpy.ndarray features: format (input-feature,time) (via Sprint)
  :return: format (output-dim,time)
  :rtype: numpy.ndarray
  """
    print("Sprint forward", segment_name, features.shape)
    start_time = time.time()
    assert engine is not None, "not initialized"
    assert sprintDataset

    # Features are in Sprint format (feature,time).
    num_time = features.shape[1]
    assert features.shape == (InputDim, num_time)
    time_a = time.time()
    dataset, seq_idx = features_to_dataset(features=features,
                                           segment_name=segment_name)
    time_b = time.time()

    if BackendEngine.is_theano_selected():
        # Prepare data for device.
        device = engine.devices[0]
        success = assign_dev_data_single_seq(device,
                                             dataset=dataset,
                                             seq=seq_idx)
        assert success, "failed to allocate & assign data for seq %i, %s" % (
            seq_idx, segment_name)
        time_c = time.time()

        # Do the actual forwarding and collect result.
        device.run("extract")
        result, _ = device.result()
        assert result is not None, "Device crashed."
        assert len(result) == 1
        posteriors = result[0]

    elif BackendEngine.is_tensorflow_selected():
        posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx)

    else:
        raise NotImplementedError("unknown backend engine")
    time_d = time.time()
    # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission).
    if posteriors.ndim == 3:
        assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength)
        posteriors = posteriors[:, 0]
    # Posteriors are in format (time,emission).
    assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength)
    # Reformat to Sprint expected format (emission,time).
    posteriors = posteriors.transpose()
    assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time)
    stats = (numpy.min(posteriors), numpy.max(posteriors),
             numpy.mean(posteriors), numpy.std(posteriors))
    print("posteriors min/max/mean/std:", stats, "time:",
          time.time() - start_time,
          time.time() - time_a,
          time.time() - time_b,
          time.time() - time_c,
          time.time() - time_d)
    if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any():
        print("posteriors:", posteriors)
        debug_feat_fn = "/tmp/crnn.pid%i.sprintinterface.debug.features.txt" % os.getpid(
        )
        debug_post_fn = "/tmp/crnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid(
        )
        print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn))
        numpy.savetxt(debug_feat_fn, features)
        numpy.savetxt(debug_post_fn, posteriors)
        assert False, "Error, posteriors contain invalid numbers."

    return posteriors
Beispiel #22
0
def _init_base(configfile=None, target_mode=None, epoch=None, sprint_opts=None):
  """
  :param str|None configfile: filename, via init(), this is set
  :param str|None target_mode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile
  """

  global isInitialized
  isInitialized = True
  # Run through in any case. Maybe just to set targetMode.

  if not getattr(sys, "argv", None):
     # Set some dummy. Some code might want this (e.g. TensorFlow).
     sys.argv = [__file__]

  global config
  if not config:
    # Some subset of what we do in rnn.init().

    rnn.init_better_exchook()
    rnn.init_thread_join_hack()

    if configfile is None:
      configfile = DefaultSprintCrnnConfig
    assert os.path.exists(configfile)
    rnn.init_config(config_filename=configfile, extra_updates={"task": target_mode})
    config = rnn.config
    if sprint_opts is not None:
      config.update(sprint_opts)

    rnn.init_log()
    rnn.returnn_greeting(config_filename=configfile)
    rnn.init_backend_engine()
    rnn.init_faulthandler(sigusr1_chain=True)
    rnn.init_config_json_network()

    global Engine
    if BackendEngine.is_tensorflow_selected():
      # Use TFEngine.Engine class instead of Engine.Engine.
      from TFEngine import Engine
    elif BackendEngine.is_theano_selected():
      from Engine import Engine

    import atexit
    atexit.register(_at_exit_handler)

  if target_mode:
    set_target_mode(target_mode)

  _init_dataset()

  if target_mode and target_mode == "forward" and epoch:
    model_filename = config.value('model', '')
    fns = [EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain) for is_pretrain in [False, True]]
    fn_postfix = ""
    if BackendEngine.is_tensorflow_selected():
      fn_postfix += ".meta"
    fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
    assert len(fns_existing) == 1, "%s not found" % fns
    model_epoch_filename = fns_existing[0]
    config.set('load', model_epoch_filename)
    assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, (
      "%r != %r" % (EngineBase.get_epoch_model(config), model_epoch_filename))

  global engine
  if not engine:
    devices = rnn.init_theano_devices()
    rnn.print_task_properties(devices)
    rnn.init_engine(devices)
    engine = rnn.engine
    assert isinstance(engine, Engine)
Beispiel #23
0
 def model_filename_postfix(cls):
     fn_postfix = ""
     if BackendEngine.is_tensorflow_selected():
         fn_postfix = ".meta"
     return fn_postfix
Beispiel #24
0
    def __init__(self,
                 corpus_file,
                 orth_symbols_file=None,
                 orth_symbols_map_file=None,
                 orth_replace_map_file=None,
                 word_based=False,
                 seq_end_symbol="[END]",
                 unknown_symbol="[UNKNOWN]",
                 parse_orth_opts=None,
                 phone_info=None,
                 add_random_phone_seqs=0,
                 partition_epoch=1,
                 auto_replace_unknown_symbol=False,
                 log_auto_replace_unknown_symbols=10,
                 log_skipped_seqs=10,
                 error_on_invalid_seq=True,
                 add_delayed_seq_data=False,
                 delayed_seq_data_start_symbol="[START]",
                 **kwargs):
        """
    :param str|()->str corpus_file: Bliss XML or line-based txt. optionally can be gzip.
    :param dict|None phone_info: if you want to get phone seqs, dict with lexicon_file etc. see PhoneSeqGenerator
    :param str|()->str|None orth_symbols_file: list of orthography symbols, if you want to get orth symbol seqs
    :param str|()->str|None orth_symbols_map_file: list of orth symbols, each line: "symbol index"
    :param str|()->str|None orth_replace_map_file: JSON file with replacement dict for orth symbols
    :param bool word_based: whether to parse single words, or otherwise will be char-based
    :param str|None seq_end_symbol: what to add at the end, if given.
      will be set as postfix=[seq_end_symbol] or postfix=[] for parse_orth_opts.
    :param dict[str]|None parse_orth_opts: kwargs for parse_orthography()
    :param int add_random_phone_seqs: will add random seqs with the same len as the real seq as additional data
    :param bool|int log_auto_replace_unknown_symbols: write about auto-replacements with unknown symbol.
      if this is an int, it will only log the first N replacements, and then keep quiet.
    :param bool|int log_skipped_seqs: write about skipped seqs to logging, due to missing lexicon entry or so.
      if this is an int, it will only log the first N entries, and then keep quiet.
    :param bool error_on_invalid_seq: if there is a seq we would have to skip, error
    :param bool add_delayed_seq_data: will add another data-key "delayed" which will have the sequence
      delayed_seq_data_start_symbol + original_sequence[:-1]
    :param str delayed_seq_data_start_symbol: used for add_delayed_seq_data
    :param int partition_epoch: whether to partition the epochs into multiple parts. like epoch_split
    """
        super(LmDataset, self).__init__(**kwargs)

        if callable(corpus_file):
            corpus_file = corpus_file()
        if callable(orth_symbols_file):
            orth_symbols_file = orth_symbols_file()
        if callable(orth_symbols_map_file):
            orth_symbols_map_file = orth_symbols_map_file()
        if callable(orth_replace_map_file):
            orth_replace_map_file = orth_replace_map_file()

        print("LmDataset, loading file", corpus_file, file=log.v4)

        self.word_based = word_based
        self.seq_end_symbol = seq_end_symbol
        self.unknown_symbol = unknown_symbol
        self.parse_orth_opts = parse_orth_opts or {}
        self.parse_orth_opts.setdefault("word_based", self.word_based)
        self.parse_orth_opts.setdefault(
            "postfix",
            [self.seq_end_symbol] if self.seq_end_symbol is not None else [])

        if orth_symbols_file:
            assert not phone_info
            assert not orth_symbols_map_file
            orth_symbols = open(orth_symbols_file).read().splitlines()
            self.orth_symbols_map = {
                sym: i
                for (i, sym) in enumerate(orth_symbols)
            }
            self.orth_symbols = orth_symbols
            self.labels["data"] = orth_symbols
            self.seq_gen = None
        elif orth_symbols_map_file:
            assert not phone_info
            orth_symbols_imap_list = [(int(b), a) for (a, b) in [
                l.split(None, 1)
                for l in open(orth_symbols_map_file).read().splitlines()
            ]]
            orth_symbols_imap_list.sort()
            assert orth_symbols_imap_list[0][0] == 0
            assert orth_symbols_imap_list[-1][0] == len(
                orth_symbols_imap_list) - 1
            self.orth_symbols_map = {
                sym: i
                for (i, sym) in orth_symbols_imap_list
            }
            self.orth_symbols = [sym for (i, sym) in orth_symbols_imap_list]
            self.labels["data"] = self.orth_symbols
            self.seq_gen = None
        else:
            assert not orth_symbols_file
            assert isinstance(phone_info, dict)
            self.seq_gen = PhoneSeqGenerator(**phone_info)
            self.orth_symbols = None
            self.labels["data"] = self.seq_gen.get_class_labels()
        if orth_replace_map_file:
            orth_replace_map = load_json(filename=orth_replace_map_file)
            assert isinstance(orth_replace_map, dict)
            self.orth_replace_map = {
                key: parse_orthography_into_symbols(v,
                                                    word_based=self.word_based)
                for (key, v) in orth_replace_map.items()
            }
            if self.orth_replace_map:
                if len(self.orth_replace_map) <= 5:
                    print("  orth_replace_map: %r" % self.orth_replace_map,
                          file=log.v5)
                else:
                    print("  orth_replace_map: %i entries" %
                          len(self.orth_replace_map),
                          file=log.v5)
        else:
            self.orth_replace_map = {}

        num_labels = len(self.labels["data"])
        use_uint_types = False
        if BackendEngine.is_tensorflow_selected():
            use_uint_types = True
        if num_labels <= 2**7:
            self.dtype = "int8"
        elif num_labels <= 2**8 and use_uint_types:
            self.dtype = "uint8"
        elif num_labels <= 2**31:
            self.dtype = "int32"
        elif num_labels <= 2**32 and use_uint_types:
            self.dtype = "uint32"
        elif num_labels <= 2**61:
            self.dtype = "int64"
        elif num_labels <= 2**62 and use_uint_types:
            self.dtype = "uint64"
        else:
            raise Exception("cannot handle so much labels: %i" % num_labels)
        self.num_outputs = {"data": [len(self.labels["data"]), 1]}
        self.num_inputs = self.num_outputs["data"][0]
        self.seq_order = None
        self.auto_replace_unknown_symbol = auto_replace_unknown_symbol
        self.log_auto_replace_unknown_symbols = log_auto_replace_unknown_symbols
        self.log_skipped_seqs = log_skipped_seqs
        self.error_on_invalid_seq = error_on_invalid_seq
        self.partition_epoch = partition_epoch
        self.add_random_phone_seqs = add_random_phone_seqs
        for i in range(add_random_phone_seqs):
            self.num_outputs["random%i" % i] = self.num_outputs["data"]
        self.add_delayed_seq_data = add_delayed_seq_data
        self.delayed_seq_data_start_symbol = delayed_seq_data_start_symbol
        if add_delayed_seq_data:
            self.num_outputs["delayed"] = self.num_outputs["data"]

        if _is_bliss(corpus_file):
            iter_f = _iter_bliss
        else:
            iter_f = _iter_txt
        self.orths = []
        iter_f(corpus_file, self.orths.append)
        # It's only estimated because we might filter some out or so.
        self._estimated_num_seqs = len(self.orths) // self.partition_epoch
        print("  done, loaded %i sequences" % len(self.orths), file=log.v4)
Beispiel #25
0
def initBase(configfile=None, targetMode=None, epoch=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None targetMode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  """

    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.initBetterExchook()
        rnn.initThreadJoinHack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.initConfig(configFilename=configfile)
        config = rnn.config

        rnn.initLog()
        rnn.returnnGreeting(configFilename=configfile)
        rnn.initBackendEngine()
        rnn.initFaulthandler(sigusr1_chain=True)
        rnn.initConfigJsonNetwork()

        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            import TFEngine
            global Engine
            Engine = TFEngine.Engine

        import atexit
        atexit.register(_at_exit_handler)

    if targetMode:
        setTargetMode(targetMode)

    initDataset()

    if targetMode and targetMode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            Engine.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert Engine.get_epoch_model(config)[1] == model_epoch_filename, \
          "%r != %r" % (Engine.get_epoch_model(config), model_epoch_filename)

    global engine
    if not engine:
        devices = rnn.initDevices()
        rnn.printTaskProperties(devices)
        rnn.initEngine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)
Beispiel #26
0
 def num_inputs_outputs_from_config(cls, config):
   """
   :type config: Config.Config
   :returns (num_inputs, num_outputs),
      where num_inputs is like num_outputs["data"][0],
      and num_outputs is a dict of data_key -> (dim, ndim),
        where data_key is e.g. "classes" or "data",
        dim is the feature dimension or the number of classes,
        and ndim is the ndim counted without batch-dim,
        i.e. ndim=1 means usually sparse data and ndim=2 means dense data.
   :rtype: (int,dict[str,(int,int)])
   """
   from Util import BackendEngine
   num_inputs = config.int('num_inputs', 0)
   target = config.value('target', 'classes')
   if config.is_typed('num_outputs'):
     num_outputs = config.typed_value('num_outputs')
     if not isinstance(num_outputs, dict):
       num_outputs = {target: num_outputs}
     num_outputs = num_outputs.copy()
     from Dataset import convert_data_dims
     num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected())
     if "data" in num_outputs:
       num_inputs = num_outputs["data"]
       if isinstance(num_inputs, (list, tuple)):
         num_inputs = num_inputs[0]
       elif isinstance(num_inputs, dict):
         if "dim" in num_inputs:
           num_inputs = num_inputs["dim"]
         else:
           num_inputs = num_inputs["shape"][-1]
       else:
         raise TypeError("data key %r" % num_inputs)
   elif config.has('num_outputs'):
     num_outputs = {target: [config.int('num_outputs', 0), 1]}
   else:
     num_outputs = None
   dataset = None
   if config.list('train') and ":" not in config.value('train', ''):
     dataset = config.list('train')[0]
   if not config.is_typed('num_outputs') and dataset:
     # noinspection PyBroadException
     try:
       _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1)
     except Exception:
       _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1)
     # noinspection PyBroadException
     try:
       _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]}
     except Exception:
       _num_outputs = hdf5_group(dataset, 'targets/size')
       for k in _num_outputs:
         _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))]
     if num_inputs:
       assert num_inputs == _num_inputs
     if num_outputs:
       assert num_outputs == _num_outputs
     num_inputs = _num_inputs
     num_outputs = _num_outputs
   if not num_inputs and not num_outputs and config.has("load") and BackendEngine.is_theano_selected():
     from Network import LayerNetwork
     import h5py
     model = h5py.File(config.value("load", ""), "r")
     # noinspection PyProtectedMember
     num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model)
   assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train"
   return num_inputs, num_outputs
Beispiel #27
0
def _init_base(configfile=None,
               target_mode=None,
               epoch=None,
               sprint_opts=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None target_mode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile
  """

    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.init_better_exchook()
        rnn.init_thread_join_hack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.init_config(config_filename=configfile,
                        extra_updates={"task": target_mode})
        assert rnn.config
        config = rnn.config
        if sprint_opts is not None:
            config.update(sprint_opts)

        rnn.init_log()
        rnn.returnn_greeting(config_filename=configfile)
        rnn.init_backend_engine()
        rnn.init_faulthandler(sigusr1_chain=True)
        rnn.init_config_json_network()

        global Engine
        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            from TFEngine import Engine
        elif BackendEngine.is_theano_selected():
            from Engine import Engine

        import atexit
        atexit.register(_at_exit_handler)

    if target_mode:
        set_target_mode(target_mode)

    _init_dataset()

    if target_mode and target_mode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, (
            "%r != %r" %
            (EngineBase.get_epoch_model(config), model_epoch_filename))

    global engine
    if not engine:
        devices = rnn.init_theano_devices()
        rnn.print_task_properties(devices)
        rnn.init_engine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)