예제 #1
0
파일: Pretrain.py 프로젝트: rwth-i6/returnn
  def copy_params_from_old_network(self, new_network, old_network):
    """
    :type new_network: LayerNetwork
    :type old_network: LayerNetwork
    :returns the remaining hidden layer names which exist only in the new network.
    :rtype: set[str]
    """
    # network.hidden are the input + all hidden layers.
    for layer_name, layer in old_network.hidden.items():
      new_network.hidden[layer_name].set_params_by_dict(layer.get_params_dict())

    # network.output is the remaining output layer.
    if self.copy_output_layer:
      from NetworkCopyUtils import intelli_copy_layer, LayerDoNotMatchForCopy
      for layer_name in new_network.output.keys():
        assert layer_name in old_network.output
        try:
          intelli_copy_layer(old_network.output[layer_name], new_network.output[layer_name])
        except LayerDoNotMatchForCopy:
          if self.copy_output_layer == "ifpossible":
            print("Pretrain: Can not copy output layer %s, will leave it randomly initialized" % layer_name,
                  file=log.v4)
          else:
            raise
    else:
      print("Pretrain: Will not copy output layer", file=log.v4)
예제 #2
0
    def copy_params_from_old_network(self, new_network, old_network):
        """
    :type new_network: LayerNetwork
    :type old_network: LayerNetwork
    :returns the remaining hidden layer names which exist only in the new network.
    :rtype: set[str]
    """
        # network.hidden are the input + all hidden layers.
        for layer_name, layer in old_network.hidden.items():
            new_network.hidden[layer_name].set_params_by_dict(
                layer.get_params_dict())

        # network.output is the remaining output layer.
        if self.copy_output_layer:
            for layer_name in new_network.output.keys():
                assert layer_name in old_network.output
                try:
                    intelli_copy_layer(old_network.output[layer_name],
                                       new_network.output[layer_name])
                except LayerDoNotMatchForCopy:
                    if self.copy_output_layer == "ifpossible":
                        print(
                            "Pretrain: Can not copy output layer %s, will leave it randomly initialized"
                            % layer_name,
                            file=log.v4)
                    else:
                        raise
        else:
            print("Pretrain: Will not copy output layer", file=log.v4)
예제 #3
0
  def init_network_from_config(self, config):
    self.pretrain = pretrainFromConfig(config)
    self.max_seqs = config.int('max_seqs', -1)
    self.compression = config.bool('compression', False)

    epoch, model_epoch_filename = self.get_epoch_model(config)
    assert model_epoch_filename or self.start_epoch

    if model_epoch_filename:
      print("loading weights from", model_epoch_filename, file=log.v2)
      last_model_hdf = h5py.File(model_epoch_filename, "r")
    else:
      last_model_hdf = None

    if config.bool('initialize_from_model', False):
      # That's only about the topology, not the params.
      print("initializing network topology from model", file=log.v5)
      assert last_model_hdf, "last model not specified. use 'load' in config. or don't use 'initialize_from_model'"
      network = LayerNetwork.from_hdf_model_topology(last_model_hdf)
    else:
      if self.pretrain:
        # This would be obsolete if we don't want to load an existing model.
        # In self.init_train_epoch(), we initialize a new model.
        network = self.pretrain.get_network_for_epoch(epoch or self.start_epoch)
      else:
        network = LayerNetwork.from_config_topology(config)

    # We have the parameters randomly initialized at this point.
    # In training, as an initialization, we can copy over the params of an imported model,
    # where our topology might slightly differ from the imported model.
    if config.value('import_model_train_epoch1', '') and self.start_epoch == 1:
      assert last_model_hdf
      old_network = LayerNetwork.from_hdf_model_topology(last_model_hdf)
      old_network.load_hdf(last_model_hdf)
      last_model_hdf.close()
      # Copy params to new network.
      from NetworkCopyUtils import intelli_copy_layer
      # network.hidden are the input + all hidden layers.
      for layer_name, layer in sorted(old_network.hidden.items()):
        print("Copy hidden layer %s" % layer_name, file=log.v3)
        intelli_copy_layer(layer, network.hidden[layer_name])
      for layer_name, layer in sorted(old_network.output.items()):
        print("Copy output layer %s" % layer_name, file=log.v3)
        intelli_copy_layer(layer, network.output[layer_name])
      print("Not copied hidden: %s" % sorted(set(network.hidden.keys()).difference(old_network.hidden.keys())), file=log.v3)
      print("Not copied output: %s" % sorted(set(network.output.keys()).difference(old_network.output.keys())), file=log.v3)

    # Maybe load existing model parameters.
    elif last_model_hdf:
      network.load_hdf(last_model_hdf)
      last_model_hdf.close()
      EngineUtil.maybe_subtract_priors(network, self.train_data, config)

    self.network = network

    if config.has('dump_json'):
      self.network_dump_json(config.value('dump_json', ''))

    self.print_network_info()
예제 #4
0
파일: Engine.py 프로젝트: chagge/returnn
  def init_network_from_config(self, config):
    self.pretrain = pretrainFromConfig(config)
    self.max_seqs = config.int('max_seqs', -1)

    epoch, model_epoch_filename = self.get_epoch_model(config)
    assert model_epoch_filename or self.start_epoch

    if model_epoch_filename:
      print >> log.v2, "loading weights from", model_epoch_filename
      last_model_hdf = h5py.File(model_epoch_filename, "r")
    else:
      last_model_hdf = None

    if config.bool('initialize_from_model', False):
      # That's only about the topology, not the params.
      print >> log.v5, "initializing network topology from model"
      assert last_model_hdf, "last model not specified. use 'load' in config. or don't use 'initialize_from_model'"
      network = LayerNetwork.from_hdf_model_topology(last_model_hdf)
    else:
      if self.pretrain:
        # This would be obsolete if we don't want to load an existing model.
        # In self.init_train_epoch(), we initialize a new model.
        network = self.pretrain.get_network_for_epoch(epoch or self.start_epoch)
      else:
        network = LayerNetwork.from_config_topology(config)

    # We have the parameters randomly initialized at this point.
    # In training, as an initialization, we can copy over the params of an imported model,
    # where our topology might slightly differ from the imported model.
    if config.value('import_model_train_epoch1', '') and self.start_epoch == 1:
      assert last_model_hdf
      old_network = LayerNetwork.from_hdf_model_topology(last_model_hdf)
      old_network.load_hdf(last_model_hdf)
      last_model_hdf.close()
      # Copy params to new network.
      from NetworkCopyUtils import intelli_copy_layer
      # network.hidden are the input + all hidden layers.
      for layer_name, layer in sorted(old_network.hidden.items()):
        print >> log.v3, "Copy hidden layer %s" % layer_name
        intelli_copy_layer(layer, network.hidden[layer_name])
      for layer_name, layer in sorted(old_network.output.items()):
        print >> log.v3, "Copy output layer %s" % layer_name
        intelli_copy_layer(layer, network.output[layer_name])
      print >> log.v3, "Not copied hidden: %s" % sorted(set(network.hidden.keys()).difference(old_network.hidden.keys()))
      print >> log.v3, "Not copied output: %s" % sorted(set(network.output.keys()).difference(old_network.output.keys()))

    # Maybe load existing model parameters.
    elif last_model_hdf:
      network.load_hdf(last_model_hdf)
      last_model_hdf.close()
      EngineUtil.maybe_subtract_priors(network, self.train_data, config)

    self.network = network

    if config.has('dump_json'):
      self.network_dump_json(config.value('dump_json', ''))

    self.print_network_info()