Example #1
0
def test_BuildOptimizer_rmsprop():
  """Test RmsOptimizer proto value conversion to Keras config."""
  config = model_pb2.Model()
  config.training.ClearField("optimizer")
  config.training.rmsprop_optimizer.initial_learning_rate_micros = 1000
  config.training.rmsprop_optimizer.learning_rate_decay_per_epoch_micros = 1000
  optimizer = builders.BuildOptimizer(config)
  optimizer_config = optimizer.get_config()
  assert pytest.approx(optimizer_config["decay"]) == 0.001
  assert pytest.approx(optimizer_config["rho"]) == 0.9
Example #2
0
def test_BuildOptimizer_adam():
  """Test AdamOptimizer proto value conversion to Keras config."""
  config = model_pb2.Model()
  config.training.ClearField("optimizer")
  config.training.adam_optimizer.initial_learning_rate_micros = 2000
  config.training.adam_optimizer.learning_rate_decay_per_epoch_micros = 5000
  config.training.adam_optimizer.beta_1_micros = 900000
  config.training.adam_optimizer.beta_2_micros = 999000
  config.training.adam_optimizer.normalized_gradient_clip_micros = 5000000
  optimizer = builders.BuildOptimizer(config)
  optimizer_config = optimizer.get_config()
  assert pytest.approx(optimizer_config["decay"]) == 0.005
  assert pytest.approx(optimizer_config["beta_1"]) == 0.9
  assert pytest.approx(optimizer_config["beta_2"]) == 0.999
  assert pytest.approx(optimizer_config["clipnorm"]) == 5.0
Example #3
0
  def Train(self, corpus, **unused_kwargs) -> 'keras.models.Sequential':
    """Locked training.

    If there are cached epoch checkpoints, the one closest to the target number
    of epochs will be loaded, and the model will be trained for only the
    remaining number of epochs, if any. This means that calling this function
    twice will only actually train the model the first time, and all subsequent
    calls will be no-ops.

    This method must only be called when the model is locked.

    Returns:
      The trained Keras model.
    """
    del unused_kwargs

    model = builders.BuildKerasModel(self.config, self.atomizer.vocab_size)
    with open(self.cache.keypath('model.yaml'), 'w') as f:
      f.write(model.to_yaml())
    model.compile(
        loss='categorical_crossentropy',
        optimizer=builders.BuildOptimizer(self.config))

    # Print a model summary.
    buf = io.StringIO()
    model.summary(print_fn=lambda x: buf.write(x + '\n'))
    app.Log(1, 'Model summary:\n%s', buf.getvalue())

    # TODO(cec): Add an atomizer.CreateVocabularyFile() method, with frequency
    # counts for a given corpus.
    def Escape(token: str) -> str:
      """Make a token visible and printable."""
      if token == '\t':
        return '\\t'
      elif token == '\n':
        return '\\n'
      elif not token.strip():
        return f"'{token}'"
      else:
        return token

    if not (self.cache.path / 'embeddings' / 'metadata.tsv').is_file():
      with open(self.cache.path / 'embeddings' / 'metadata.tsv', 'w') as f:
        for _, token in sorted(
            self.atomizer.decoder.items(), key=lambda x: x[0]):
          f.write(Escape(token) + '\n')

    target_num_epochs = self.config.training.num_epochs
    starting_epoch = 0

    epoch_checkpoints = self.epoch_checkpoints
    if len(epoch_checkpoints) >= target_num_epochs:
      # We have already trained a model to at least this number of epochs, so
      # simply the weights from that epoch and call it a day.
      app.Log(1, 'Loading weights from %s',
              epoch_checkpoints[target_num_epochs - 1])
      model.load_weights(epoch_checkpoints[target_num_epochs - 1])
      return model

    # Now entering the point at which training is inevitable.
    with logutil.TeeLogsToFile('train', self.cache.path / 'logs'):
      # Deferred importing of Keras so that we don't have to activate the
      # TensorFlow backend every time we import this module.
      import keras

      if epoch_checkpoints:
        # We have already trained a model at least part of the way to our target
        # number of epochs, so load the most recent one.
        starting_epoch = len(epoch_checkpoints)
        app.Log(1, 'Resuming training from epoch %d.', starting_epoch)
        model.load_weights(epoch_checkpoints[-1])

      callbacks = [
          keras.callbacks.ModelCheckpoint(
              str(self.cache.path / 'checkpoints' / '{epoch:03d}.hdf5'),
              verbose=1,
              mode="min",
              save_best_only=False),
          keras.callbacks.TensorBoard(
              str(self.cache.path / 'embeddings'),
              write_graph=True,
              embeddings_freq=1,
              embeddings_metadata={
                  'embedding_1':
                  str(self.cache.path / 'embeddings' / 'metadata.tsv'),
              }),
          telemetry.TrainingLogger(
              self.cache.path / 'logs').KerasCallback(keras),
      ]

      generator = data_generators.AutoGenerator(corpus, self.config.training)
      steps_per_epoch = (corpus.encoded.token_count - 1) // (
          self.config.training.batch_size *
          self.config.training.sequence_length)
      app.Log(
          1, 'Step counts: %s per epoch, %s left to do, %s total',
          humanize.Commas(steps_per_epoch),
          humanize.Commas(
              (target_num_epochs - starting_epoch) * steps_per_epoch),
          humanize.Commas(target_num_epochs * steps_per_epoch))
      model.fit_generator(
          generator,
          steps_per_epoch=steps_per_epoch,
          callbacks=callbacks,
          initial_epoch=starting_epoch,
          epochs=target_num_epochs)
    return model
Example #4
0
    def Train(self, corpus, **unused_kwargs) -> "keras.models.Sequential":
        """Locked training.

    If there are cached epoch checkpoints, the one closest to the target number
    of epochs will be loaded, and the model will be trained for only the
    remaining number of epochs, if any. This means that calling this function
    twice will only actually train the model the first time, and all subsequent
    calls will be no-ops.

    This method must only be called when the model is locked.

    Returns:
      The trained Keras model.
    """
        del unused_kwargs

        model = builders.BuildKerasModel(self.config,
                                         self.tokenizer.vocab_size)
        with open(self.cache.keypath("model.yaml"), "w") as f:
            f.write(model.to_yaml())
        model.compile(
            loss="categorical_crossentropy",
            optimizer=builders.BuildOptimizer(self.config),
        )

        # Print a model summary.
        buf = io.StringIO()
        model.summary(print_fn=lambda x: buf.write(x + "\n"))
        l.logger().info("Model summary:\n{}".format(buf.getvalue()))

        # TODO(cec): Add an tokenizer.CreateVocabularyFile() method, with frequency
        # counts for a given corpus.
        def Escape(token: str) -> str:
            """Make a token visible and printable."""
            if token == "\t":
                return "\\t"
            elif token == "\n":
                return "\\n"
            elif not token.strip():
                return f"'{token}'"
            else:
                return token

        if not (self.cache.path / "embeddings" / "metadata.tsv").is_file():
            with open(self.cache.path / "embeddings" / "metadata.tsv",
                      "w") as f:
                for _, token in sorted(self.tokenizer.decoder.items(),
                                       key=lambda x: x[0]):
                    f.write(Escape(token) + "\n")

        self.num_epochs = self.config.training.num_epochs
        starting_epoch = 0

        epoch_checkpoints = self.epoch_checkpoints
        if len(epoch_checkpoints) >= self.num_epochs:
            # We have already trained a model to at least this number of epochs, so
            # simply the weights from that epoch and call it a day.
            l.logger().info("Loading weights from {}".format(
                epoch_checkpoints[self.num_epochs - 1]))
            model.load_weights(epoch_checkpoints[self.num_epochs - 1])
            return model

        # Now entering the point at which training is inevitable.
        # with logutil.TeeLogsToFile("train", self.cache.path / "logs"):
        # Deferred importing of Keras so that we don't have to activate the
        # TensorFlow backend every time we import this module.
        import keras

        if epoch_checkpoints:
            # We have already trained a model at least part of the way to our target
            # number of epochs, so load the most recent one.
            starting_epoch = len(epoch_checkpoints)
            l.logger().info(
                "Resuming training from epoch {}.".format(starting_epoch))
            model.load_weights(epoch_checkpoints[-1])

        callbacks = [
            keras.callbacks.ModelCheckpoint(
                str(self.cache.path / "checkpoints" / "{epoch:03d}.hdf5"),
                verbose=1,
                mode="min",
                save_best_only=False,
            ),
            keras.callbacks.TensorBoard(
                str(self.cache.path / "embeddings"),
                write_graph=True,
                embeddings_freq=1,
                embeddings_metadata={
                    "embedding_1":
                    str(self.cache.path / "embeddings" / "metadata.tsv"),
                },
            ),
            self.telemetry.TrainingLogger(self.cache.path /
                                          "logs").KerasCallback(keras),
        ]

        generator = KerasBatchGenerator()
        steps_per_epoch = (corpus.encoded.token_count -
                           1) // (self.config.training.batch_size *
                                  self.config.training.sequence_length)
        l.logger().info(
            "Step counts: {} per epoch, {} left to do, {} total".format(
                humanize.intcomma(steps_per_epoch),
                humanize.intcomma(
                    (self.num_epochs - starting_epoch) * steps_per_epoch),
                humanize.intcomma(self.num_epochs * steps_per_epoch),
            ))
        model.fit_generator(
            generator.AutoGenerator(corpus, self.config.training),
            steps_per_epoch=steps_per_epoch,
            callbacks=callbacks,
            initial_epoch=starting_epoch,
            epochs=self.num_epochs,
        )
        return model