예제 #1
0
    def _compile_keras_model(self, hp, model):
        # Specify hyperparameters from compile(...)
        optimizer_name = hp.Choice(
            "optimizer",
            ["adam", "sgd", "adam_weight_decay"],
            default="adam",
        )
        # TODO: add adadelta optimizer when it can optimize embedding layer on GPU.
        learning_rate = hp.Choice("learning_rate",
                                  [1e-1, 1e-2, 1e-3, 1e-4, 2e-5, 1e-5],
                                  default=1e-3)

        if optimizer_name == "adam":
            optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        elif optimizer_name == "sgd":
            optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        elif optimizer_name == "adam_weight_decay":
            steps_per_epoch = int(self.num_samples / self.batch_size)
            num_train_steps = steps_per_epoch * self.epochs
            warmup_steps = int(self.epochs * self.num_samples * 0.1 /
                               self.batch_size)

            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=num_train_steps,
                end_learning_rate=0.0,
            )
            if warmup_steps:
                lr_schedule = keras_layers.WarmUp(
                    initial_learning_rate=learning_rate,
                    decay_schedule_fn=lr_schedule,
                    warmup_steps=warmup_steps,
                )

            optimizer = keras_layers.AdamWeightDecay(
                learning_rate=lr_schedule,
                weight_decay_rate=0.01,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=1e-6,
                exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"],
            )

        model.compile(optimizer=optimizer,
                      metrics=self._get_metrics(),
                      loss=self._get_loss())

        return model
예제 #2
0
def test_adam_weight_decay(tmp_path):
    model = tf.keras.Sequential([tf.keras.layers.Dense(10, input_shape=(10,))])
    lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=0.1,
        decay_steps=100,
        end_learning_rate=0.0,
    )
    lr_schedule = layer_module.WarmUp(
        initial_learning_rate=0.1,
        decay_schedule_fn=lr_schedule,
        warmup_steps=10,
    )
    optimizer = layer_module.AdamWeightDecay(
        learning_rate=lr_schedule,
        weight_decay_rate=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"],
    )
    model.compile(loss="mse", optimizer=optimizer)
    model.fit(np.random.rand(100, 10), np.random.rand(100, 10), epochs=2)
    model.save(os.path.join(tmp_path, "model"))