コード例 #1
0
 def _layer_params(self, info, sources, mask, reverse=False):
   """
   :param dict[str] info: self.hidden_info[i]
   :param list[str] sources: 'from' entry
   :param None | str mask: mask
   :param bool reverse: reverse or not
   :rtype: dict[str]
   """
   from returnn.util.basic import BackendEngine, getargspec
   if BackendEngine.is_theano_selected():
     from returnn.theano.layers.basic import get_layer_class
   elif BackendEngine.is_tensorflow_selected():
     from returnn.tf.layers.basic import get_layer_class
   else:
     raise NotImplementedError
   params = dict(self.default_layer_info)
   params.update(info)
   params["from"] = sources
   if mask:
     params["mask"] = mask
   layer_class = get_layer_class(params["layer_class"])
   if layer_class.recurrent:
     params['truncation'] = self.truncation
     if self.bidirectional:
       if not reverse:
         params['name'] += "_fw"
       else:
         params['name'] += "_bw"
         params['reverse'] = True
     if 'sharpgates' in getargspec(layer_class.__init__).args[1:]:
       params['sharpgates'] = self.sharpgates
   return params
コード例 #2
0
ファイル: __main__.py プロジェクト: e0397123/returnn
def init_backend_engine():
    """
  Initializes ``engine``, which is either :class:`TFEngine.Engine` or Theano :class:`Engine.Engine`.
  """
    BackendEngine.select_engine(config=config)
    if BackendEngine.is_theano_selected():
        print("Theano:", describe_theano_version(), file=log.v3)
        import returnn.theano.util
        returnn.theano.util.monkey_patches()
    elif BackendEngine.is_tensorflow_selected():
        print("TensorFlow:", describe_tensorflow_version(), file=log.v3)
        if get_tensorflow_version_tuple()[0] == 0:
            print("Warning: TF <1.0 is not supported and likely broken.",
                  file=log.v2)
        if os.environ.get("TF_DEVICE"):
            print("Devices: Use %s via TF_DEVICE instead of %s." %
                  (os.environ.get("TF_DEVICE"),
                   config.opt_typed_value("device")),
                  file=log.v4)
            config.set("device", os.environ.get("TF_DEVICE"))
        if config.is_true("use_horovod"):
            import returnn.tf.horovod
            hvd = returnn.tf.horovod.get_ctx(config=config)
            import socket
            if "gpu" in config.value("device", "") or os.environ.get(
                    "CUDA_VISIBLE_DEVICES", ""):
                # We assume that we want to use a GPU.
                gpu_opts = config.typed_dict.setdefault("tf_session_opts",
                                                        {}).setdefault(
                                                            "gpu_options", {})
                assert "visible_device_list" not in gpu_opts
                gpu_opts["visible_device_list"] = str(hvd.local_rank())
                print("Horovod: Hostname %s, pid %i, using GPU %s." %
                      (socket.gethostname(), os.getpid(),
                       gpu_opts["visible_device_list"]),
                      file=log.v3)
            else:
                if hvd.rank() == 0:  # Don't spam in all ranks.
                    print("Horovod: Not using GPU.", file=log.v3)
            if hvd.rank() == 0:  # Don't spam in all ranks.
                print("Horovod: Reduce type:",
                      hvd.get_reduce_type(),
                      file=log.v3)
        from returnn.tf.util.basic import debug_register_better_repr, setup_tf_thread_pools, print_available_devices
        tf_session_opts = config.typed_value("tf_session_opts", {})
        assert isinstance(tf_session_opts, dict)
        # This must be done after the Horovod logic, such that we only touch the devices we are supposed to touch.
        setup_tf_thread_pools(log_file=log.v3, tf_session_opts=tf_session_opts)
        # Print available devices. Also make sure that get_tf_list_local_devices uses the correct TF session opts.
        print_available_devices(tf_session_opts=tf_session_opts, file=log.v2)
        from returnn.tf.native_op import OpMaker
        OpMaker.log_stream = log.v3
        debug_register_better_repr()
        if config.is_true("distributed_tf"):
            import returnn.tf.distributed
            returnn.tf.distributed.init_distributed_tf(config)
    else:
        raise NotImplementedError
コード例 #3
0
ファイル: __main__.py プロジェクト: e0397123/returnn
def init_engine(devices):
    """
  Initializes global engine.

  :type devices: list[Device.Device]|None
  """
    global engine
    if BackendEngine.is_theano_selected():
        from returnn.theano.engine import Engine
        engine = Engine(devices)
    elif BackendEngine.is_tensorflow_selected():
        from returnn.tf.engine import Engine
        engine = Engine(config=config)
    else:
        raise NotImplementedError
コード例 #4
0
ファイル: __main__.py プロジェクト: e0397123/returnn
def finalize(error_occurred=False):
    """
  Cleanup at the end.

  :param bool error_occurred:
  """
    print("Quitting", file=getattr(log, "v4", sys.stderr))
    global quit_returnn
    quit_returnn = True
    sys.exited = True
    if engine:
        if BackendEngine.is_theano_selected():
            for device in engine.devices:
                device.terminate()
        elif BackendEngine.is_tensorflow_selected():
            engine.finalize(error_occurred=error_occurred)
コード例 #5
0
ファイル: base.py プロジェクト: twistedmove/returnn
 def get_existing_models(cls, config):
   """
   :param Config.Config config:
   :return: dict epoch -> model filename
   :rtype: dict[int,str]
   """
   model_filename = config.value('model', '')
   if not model_filename:
     return []
   # Automatically search the filesystem for existing models.
   file_list = {}
   for epoch in range(1, cls.config_get_final_epoch(config) + 1):
     for is_pretrain in [False, True]:
       fn = cls.epoch_model_filename(model_filename, epoch, is_pretrain)
       if os.path.exists(fn):
         file_list[epoch] = fn
         break
       if BackendEngine.is_tensorflow_selected():
         if os.path.exists(fn + ".index"):
           file_list[epoch] = fn
           break
   return file_list
コード例 #6
0
 def num_inputs_outputs_from_config(cls, config):
   """
   :type config: Config.Config
   :returns (num_inputs, num_outputs),
      where num_inputs is like num_outputs["data"][0],
      and num_outputs is a dict of data_key -> (dim, ndim),
        where data_key is e.g. "classes" or "data",
        dim is the feature dimension or the number of classes,
        and ndim is the ndim counted without batch-dim,
        i.e. ndim=1 means usually sparse data and ndim=2 means dense data.
   :rtype: (int,dict[str,(int,int)])
   """
   from returnn.util.basic import BackendEngine
   num_inputs = config.int('num_inputs', 0)
   target = config.value('target', 'classes')
   if config.is_typed('num_outputs'):
     num_outputs = config.typed_value('num_outputs')
     if not isinstance(num_outputs, dict):
       num_outputs = {target: num_outputs}
     num_outputs = num_outputs.copy()
     from returnn.datasets.basic import convert_data_dims
     num_outputs = convert_data_dims(num_outputs, leave_dict_as_is=BackendEngine.is_tensorflow_selected())
     if "data" in num_outputs:
       num_inputs = num_outputs["data"]
       if isinstance(num_inputs, (list, tuple)):
         num_inputs = num_inputs[0]
       elif isinstance(num_inputs, dict):
         if "dim" in num_inputs:
           num_inputs = num_inputs["dim"]
         else:
           num_inputs = num_inputs["shape"][-1]
       else:
         raise TypeError("data key %r" % num_inputs)
   elif config.has('num_outputs'):
     num_outputs = {target: [config.int('num_outputs', 0), 1]}
   else:
     num_outputs = None
   dataset = None
   if config.list('train') and ":" not in config.value('train', ''):
     dataset = config.list('train')[0]
   if not config.is_typed('num_outputs') and dataset:
     # noinspection PyBroadException
     try:
       _num_inputs = hdf5_dimension(dataset, 'inputCodeSize') * config.int('window', 1)
     except Exception:
       _num_inputs = hdf5_dimension(dataset, 'inputPattSize') * config.int('window', 1)
     # noinspection PyBroadException
     try:
       _num_outputs = {target: [hdf5_dimension(dataset, 'numLabels'), 1]}
     except Exception:
       _num_outputs = hdf5_group(dataset, 'targets/size')
       for k in _num_outputs:
         _num_outputs[k] = [_num_outputs[k], len(hdf5_shape(dataset, 'targets/data/' + k))]
     if num_inputs:
       assert num_inputs == _num_inputs
     if num_outputs:
       assert num_outputs == _num_outputs
     num_inputs = _num_inputs
     num_outputs = _num_outputs
   if not num_inputs and not num_outputs and config.has("load") and BackendEngine.is_theano_selected():
     from returnn.theano.network import LayerNetwork
     import h5py
     model = h5py.File(config.value("load", ""), "r")
     # noinspection PyProtectedMember
     num_inputs, num_outputs = LayerNetwork._n_in_out_from_hdf_model(model)
   assert num_inputs and num_outputs, "provide num_inputs/num_outputs directly or via train"
   return num_inputs, num_outputs
コード例 #7
0
def _forward(segment_name, features):
    """
  :param numpy.ndarray features: format (input-feature,time) (via Sprint)
  :return: format (output-dim,time)
  :rtype: numpy.ndarray
  """
    print("Sprint forward", segment_name, features.shape)
    start_time = time.time()
    assert engine is not None, "not initialized"
    assert sprintDataset

    # Features are in Sprint format (feature,time).
    num_time = features.shape[1]
    assert features.shape == (InputDim, num_time)
    dataset, seq_idx = features_to_dataset(features=features,
                                           segment_name=segment_name)

    if BackendEngine.is_theano_selected():
        # Prepare data for device.
        device = engine.devices[0]
        from returnn.theano.engine_util import assign_dev_data_single_seq
        success = assign_dev_data_single_seq(device,
                                             dataset=dataset,
                                             seq=seq_idx)
        assert success, "failed to allocate & assign data for seq %i, %s" % (
            seq_idx, segment_name)

        # Do the actual forwarding and collect result.
        device.run("extract")
        result, _ = device.result()
        assert result is not None, "Device crashed."
        assert len(result) == 1
        posteriors = result[0]

    elif BackendEngine.is_tensorflow_selected():
        posteriors = engine.forward_single(dataset=dataset, seq_idx=seq_idx)

    else:
        raise NotImplementedError("unknown backend engine")
    # If we have a sequence training criterion, posteriors might be in format (time,seq|batch,emission).
    if posteriors.ndim == 3:
        assert posteriors.shape == (num_time, 1, OutputDim * MaxSegmentLength)
        posteriors = posteriors[:, 0]
    # Posteriors are in format (time,emission).
    assert posteriors.shape == (num_time, OutputDim * MaxSegmentLength)
    # Reformat to Sprint expected format (emission,time).
    posteriors = posteriors.transpose()
    assert posteriors.shape == (OutputDim * MaxSegmentLength, num_time)
    stats = (numpy.min(posteriors), numpy.max(posteriors),
             numpy.mean(posteriors), numpy.std(posteriors))
    print("posteriors min/max/mean/std:", stats, "time:",
          time.time() - start_time)
    if numpy.isinf(posteriors).any() or numpy.isnan(posteriors).any():
        print("posteriors:", posteriors)
        debug_feat_fn = "/tmp/returnn.pid%i.sprintinterface.debug.features.txt" % os.getpid(
        )
        debug_post_fn = "/tmp/returnn.pid%i.sprintinterface.debug.posteriors.txt" % os.getpid(
        )
        print("Wrote to files %s, %s" % (debug_feat_fn, debug_post_fn))
        numpy.savetxt(debug_feat_fn, features)
        numpy.savetxt(debug_post_fn, posteriors)
        assert False, "Error, posteriors contain invalid numbers."

    return posteriors
コード例 #8
0
def _init_base(configfile=None,
               target_mode=None,
               epoch=None,
               sprint_opts=None):
    """
  :param str|None configfile: filename, via init(), this is set
  :param str|None target_mode: "forward" or so. via init(), this is set
  :param int epoch: via init(), this is set
  :param dict[str,str]|None sprint_opts: optional parameters to override values in configfile
  """
    global isInitialized
    isInitialized = True
    # Run through in any case. Maybe just to set targetMode.

    if not getattr(sys, "argv", None):
        # Set some dummy. Some code might want this (e.g. TensorFlow).
        sys.argv = [__file__]

    global Engine
    global config
    if not config:
        # Some subset of what we do in rnn.init().

        rnn.init_better_exchook()
        rnn.init_thread_join_hack()

        if configfile is None:
            configfile = DefaultSprintCrnnConfig
        assert os.path.exists(configfile)
        rnn.init_config(config_filename=configfile,
                        extra_updates={"task": target_mode})
        assert rnn.config
        config = rnn.config
        if sprint_opts is not None:
            config.update(sprint_opts)

        rnn.init_log()
        rnn.returnn_greeting(config_filename=configfile)
        rnn.init_backend_engine()
        rnn.init_faulthandler(sigusr1_chain=True)
        rnn.init_config_json_network()

        if BackendEngine.is_tensorflow_selected():
            # Use TFEngine.Engine class instead of Engine.Engine.
            from returnn.tf.engine import Engine
        elif BackendEngine.is_theano_selected():
            from returnn.theano.engine import Engine

        import atexit
        atexit.register(_at_exit_handler)

    if target_mode:
        set_target_mode(target_mode)

    _init_dataset()

    if target_mode and target_mode == "forward" and epoch:
        model_filename = config.value('model', '')
        fns = [
            EngineBase.epoch_model_filename(model_filename, epoch, is_pretrain)
            for is_pretrain in [False, True]
        ]
        fn_postfix = ""
        if BackendEngine.is_tensorflow_selected():
            fn_postfix += ".meta"
        fns_existing = [fn for fn in fns if os.path.exists(fn + fn_postfix)]
        assert len(fns_existing) == 1, "%s not found" % fns
        model_epoch_filename = fns_existing[0]
        config.set('load', model_epoch_filename)
        assert EngineBase.get_epoch_model(config)[1] == model_epoch_filename, (
            "%r != %r" %
            (EngineBase.get_epoch_model(config), model_epoch_filename))

    global engine
    if not engine:
        devices = rnn.init_theano_devices()
        rnn.print_task_properties(devices)
        rnn.init_engine(devices)
        engine = rnn.engine
        assert isinstance(engine, Engine)