def test_tfkeras_pruning_callback_monitor_is_invalid() -> None:

    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = study.ask()
    callback = TFKerasPruningCallback(trial, "InvalidMonitor")

    with pytest.warns(UserWarning):
        callback.on_epoch_end(0, {"loss": 1.0})
Exemple #2
0
def objective(trial):
    # hyperparameter search space
    units = trial.suggest_int("units", 25, 250)
    dropout = trial.suggest_categorical("dropout", [0, 0.1, 0.2, 0.3, 0.4])
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 96, 128])
    epochs = trial.suggest_int("epochs", 5, 30)
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)

    model = Sequential()
    model.add(LSTM(units=units, input_shape=input_shape, dropout=dropout))
    model.add(Dense(num_labels, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer=RMSprop(lr=lr),
                  metrics=['accuracy'])

    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=0,
              validation_data=(x_valid, y_valid),
              callbacks=[TFKerasPruningCallback(trial, 'val_loss')])
    #validation_data=(x_valid,y_valid), callbacks=None)

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(x_valid, y_valid, batch_size=batch_size, verbose=0)

    return score[1]
Exemple #3
0
    def objective(trial):
        # type: (optuna.trial.Trial) -> float

        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_dim=20))
        model.compile(optimizer="rmsprop",
                      loss="binary_crossentropy",
                      metrics=["accuracy"])

        # TODO(Yanase): Unify the metric with 'accuracy' after stopping TensorFlow 1.x support.
        callback_metric_name = "accuracy"
        if pkg_resources.parse_version(
                tf.__version__) < pkg_resources.parse_version("2.0.0"):
            callback_metric_name = "acc"

        model.fit(
            np.zeros((16, 20), np.float32),
            np.zeros((16, ), np.int32),
            batch_size=1,
            epochs=1,
            callbacks=[TFKerasPruningCallback(trial, callback_metric_name)],
            verbose=0,
        )

        return 1.0
def objective(trial):
    # Clear clutter from previous TensorFlow graphs.
    tf.keras.backend.clear_session()

    # Metrics to be monitored by Optuna.
    if tf.__version__ >= "2":
        monitor = "val_accuracy"
    else:
        monitor = "val_acc"

    # Create tf.keras model instance.
    model = create_model(trial)

    # Create dataset instance.
    ds_train = train_dataset()
    ds_eval = eval_dataset()

    # Create callbacks for early stopping and pruning.
    callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=3),
        TFKerasPruningCallback(trial, monitor),
    ]

    # Train model.
    history = model.fit(
        ds_train,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
        validation_data=ds_eval,
        validation_steps=VALIDATION_STEPS,
        callbacks=callbacks,
    )

    return history.history[monitor][-1]
Exemple #5
0
def fit_evaluate(trial):
    with tf.device("/GPU:0"):
        try:
            model = PairModel(
                n_blocks=trial.suggest_int("n_blocks", 1, 6),
                base_filters=trial.suggest_int("base_filters", 8, 32, log=True),
                kernel_size=trial.suggest_int("kernel_size", 3, 11),
                pool_strides=trial.suggest_int("pool_strides", 2, 5),
                dilation_rate=trial.suggest_int("dilation_rate", 1, 5),
                hidden_size=trial.suggest_int("hidden_size", 32, 512, log=True),
                input_shape=(pair_gen_train.window_size, pair_gen_train.channel_size),
            )
        except Exception:
            # Invalid model architecture
            return 0.0

    initial_learning_rate = trial.suggest_loguniform(
        "initial_learning_rate", 1e-5, 1e-2
    )
    lr_schedule = tf.keras.experimental.CosineDecay(
        initial_learning_rate=initial_learning_rate,
        decay_steps=STEPS_PER_EPOCH * EPOCHS,
    )
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

    # Checkpointing
    utc_timestamp = calendar.timegm(trial.datetime_start.utctimetuple())
    run_name = f"trial-{trial.number:05d}-{utc_timestamp}"
    run_path = Path("checkpoints") / run_name
    checkpoint_path = run_path / "epoch-{epoch:02d}"
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(str(checkpoint_path))

    history = model.fit(
        ds_pairs_train.batch(BATCH_SIZE),
        steps_per_epoch=STEPS_PER_EPOCH,
        epochs=EPOCHS,
        validation_data=validation_data,
        callbacks=[checkpoint_cb, TFKerasPruningCallback(trial, "val_accuracy")],
    )
    with open(run_path / "history.pkl", "wb") as f:
        pickle.dump(history.history, f)

    with open(run_path / "params.json", "w") as f:
        json.dump(trial.params, f, sort_keys=True, indent=2)

    _, accuracy = model.evaluate(validation_data)
    return accuracy
Exemple #6
0
    def objective(trial):
        # type: (optuna.trial.Trial) -> float

        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(1, activation='sigmoid', input_dim=20))
        model.compile(optimizer='rmsprop',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(np.zeros((16, 20), np.float32),
                  np.zeros((16, ), np.int32),
                  batch_size=1,
                  epochs=1,
                  callbacks=[TFKerasPruningCallback(trial, 'acc')],
                  verbose=0)

        return 1.0
def test_tfkeras_pruning_callback_observation_isnan() -> None:

    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = study.ask()
    callback = TFKerasPruningCallback(trial, "loss")

    with pytest.raises(optuna.TrialPruned):
        callback.on_epoch_end(0, {"loss": 1.0})

    with pytest.raises(optuna.TrialPruned):
        callback.on_epoch_end(0, {"loss": float("nan")})
Exemple #8
0
def test_tfkeras_pruning_callback_observation_isnan():
    # type: () -> None

    study = optuna.create_study(pruner=DeterministicPruner(True))
    trial = create_running_trial(study, 1.0)
    callback = TFKerasPruningCallback(trial, 'loss')

    with pytest.raises(optuna.structs.TrialPruned):
        callback.on_epoch_end(0, {'loss': 1.0})

    with pytest.raises(optuna.structs.TrialPruned):
        callback.on_epoch_end(0, {'loss': float('nan')})
def objective(trial):
    # Clear clutter from previous TensorFlow graphs.
    tf.keras.backend.clear_session()

    # Metrics to be monitored by Optuna.
    if tf.__version__ >= "2":
        monitor = "val_accuracy"
    else:
        monitor = "val_acc"

    # Create tf.keras model instance.
    model = create_model(trial)

    # Create dataset instance.
    ds_train = train_dataset()
    ds_eval = eval_dataset()

    # Create callbacks for early stopping and pruning.
    callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=3),
        TFKerasPruningCallback(trial, monitor),
    ]

    # Train model.
    history = model.fit(
        ds_train,
        epochs=EPOCHS,
        steps_per_epoch=STEPS_PER_EPOCH,
        validation_data=ds_eval,
        validation_steps=VALIDATION_STEPS,
        callbacks=callbacks,
    )

    # TODO(@sfujiwara): Investigate why the logger here is called twice.
    # tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
    # tf.compat.v1.logging.info('hello optuna')

    return history.history[monitor][-1]
Exemple #10
0
    def get_callbacks(cls,
                      tensorboard: bool = True,
                      consine_annealing=False,
                      reduce_lr_on_plateau=True,
                      early_stopping=True,
                      monitor='val_loss',
                      **kwargs):
        """よく利用するコールバックを簡単に取得できるようにします。

        デフォルトではTensorBoard,ReduceLROnPlateau(),EarlyStopping(val_loss非更新、10エポックで停止)が自動的に選ばれます。
        また、tftk.ENABLE_SUSPEND_RESUME_TRAINING()が有効な場合、中断/再開を可能にするSuspendCallbackが使用されます。
        
        Parameters:
            tensorboard : TensorBoardのログをBaseDir/TrainingName以下に保存する場合はTrueを指定する。未指定時 True
            cosine_annealing : CosineAnnealingを行い学習率をコントロールする場合はTrue、未指定時 False
            reduce_lr_on_plateau : ReduceLROnPlateauで停滞時に学習率を下げる場合はTrue 、未指定時 True
            ealy_stopping : EarlyStoppingで停滞時に学習を終了する場合、True。 未指定時 True.
            csv_logger : CSVLoggerを使用し、学習の記録を行う
            monitor {str} -- [description] (default: {"val_acc"})

        Keyword Arguments:
            profile_batch{str} -- プロファイルを行う際の開始バッチ、終了バッチを指定します。Noneの場合実行しません。
            annealing_epoch : コサイン・アニーリング全体のエポック数、指定なし 100エポック
            init_lr : コサイン・アニーリングする際の最初の学習率、未指定時 0.01
            min_lr : 最小の学習率、コサイン・アニーリング時 = 1e-6, ReduceOnPlateau時1e-6
            patience : ReduceOnPlateau使用時にpatienceエポック数、モニター値の更新がない場合、factor分学習率を下げる。
            early_stopping_patience : EalyStopping利用時に、このエポック数、monitor値の更新がなければ、学習を終了する。

        Returns:
            List[tf.keras.callbacks.Callback] -- 学習に使用するコールバックのList

        Example:
            from tftk.callbacks import HandyCallback
            callbacks = HandyCallback.get_callbacks(early_stopping_patience=15)
        """

        context = Context.get_instance()
        base = context[Context.TRAINING_BASE_DIR]
        name = context[Context.TRAINING_NAME]

        traing_dir = Context.get_training_path()

        if tf.io.gfile.exists(traing_dir) == False:
            tf.io.gfile.makedirs(traing_dir)

        callbacks = []

        if tensorboard is True:
            # print("Callback-TensorBoard")
            tensorboard_log_dir = traing_dir + os.path.sep + "log"
            profile_batch = kwargs.get("profile_batch", None)
            if profile_batch != None:
                callbacks.append(
                    tf.keras.callbacks.TensorBoard(log_dir=tensorboard_log_dir,
                                                   profile_batch=profile_batch,
                                                   histogram_freq=1))
            else:
                callbacks.append(
                    tf.keras.callbacks.TensorBoard(
                        log_dir=tensorboard_log_dir))

        # if save_weights == True:
        #     print("Callback-ModelCheckPoint")
        #     save_path = base + os.path.sep  + "model.hdf5"
        #     callbacks.append(tf.keras.callbacks.ModelCheckpoint(filepath=save_path,monitor="val_acc",save_best_only=True,save_weights_only=True))

        if consine_annealing == True:
            print("Callback-CosineAnnealing")
            annealing_epoch = kwargs.get("annealing_epoch", 100)
            init_lr = kwargs.get("init_lr", 0.01)
            min_lr = kwargs.get("min_lr", 1e-6)
            cosine_annealer = CosineAnnealingScheduler(annealing_epoch,
                                                       eta_max=init_lr,
                                                       eta_min=min_lr)
            callbacks.append(cosine_annealer)
            reduce_lr_on_plateau = False

        if reduce_lr_on_plateau == True:
            print("Callback-ReduceOnPlateau")
            patience = kwargs.get("patience", 5)
            factor = kwargs.get("factor", 0.25)
            min_lr = kwargs.get("min_lr", 1e-6)
            callbacks.append(
                tf.keras.callbacks.ReduceLROnPlateau(patience=patience,
                                                     factor=factor,
                                                     verbose=1,
                                                     min_lr=min_lr))

        if early_stopping == True & context.get(Context.OPTUNA,
                                                False) == False:
            early_stopping_patience = kwargs.get("early_stopping_patience", 8)
            callbacks.append(
                tf.keras.callbacks.EarlyStopping(
                    monitor=monitor,
                    patience=early_stopping_patience,
                    verbose=1))

        if IS_SUSPEND_RESUME_TRAINING() == True:
            print("Suspend Resume Callback")
            callbacks.append(SuspendCallback(monitor=monitor))

        if context.get(Context.OPTUNA, False) == True:
            print("Using Optuna")
            # callbacks.append()
            trial = context.get(Context.OPTUNA_TRIAL)
            callbacks.append(TFKerasPruningCallback(trial, monitor=monitor))

        return callbacks
Exemple #11
0
    def _do_set_callbacks_task(self, base_model, train_dataset, valid_dataset,
                               trial):

        # Save Model Checkpoint
        # result_dir/model/
        model_save_file = os.path.join(self.config.model_path, "best_model.h5")
        checkpoint = keras.callbacks.ModelCheckpoint(filepath=model_save_file,
                                                     save_best_only=True)

        # Learning Rate Schedule
        if self.config.train_params.scheduler:
            funcs = self.config.train_params.scheduler["functions"]
            scheduler_name = next(iter(funcs.keys()))
            scheduler_params = next(iter(funcs.values()))
            scheduler_params["n_epoch"] = self.config.epochs
            scheduler_params[
                "base_lr"] = self.config.train_params.learning_rate
            scheduler = keras.callbacks.LearningRateScheduler(
                getattr(schedulers, scheduler_name)(**scheduler_params))
        else:
            scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5,
                                                          patience=10,
                                                          verbose=1)

        # Plot History
        # result_dir/learning/
        learning_path = self.config.learning_path

        plot_history = ncc.callbacks.PlotHistory(
            learning_path, ['loss', 'acc', 'iou_score', 'f1-score'])

        plot_learning_rate = ncc.callbacks.PlotLearningRate(learning_path)

        callbacks = [checkpoint, scheduler, plot_history, plot_learning_rate]

        if self.config.task == ncc.tasks.Task.SEMANTIC_SEGMENTATION:
            # Plot IoU History
            iou_history = ncc.callbacks.IouHistory(
                save_dir=learning_path,
                valid_dataset=valid_dataset,
                class_names=self.config.class_names,
                batch_size=self.config.train_params.batch_size)

            # Predict validation
            # result_dir/image/validation/
            val_save_dir = os.path.join(self.config.image_path, "validation")

            generate_sample_result = ncc.callbacks.GenerateSampleResult(
                val_save_dir=val_save_dir,
                valid_dataset=valid_dataset,
                nb_classes=self.config.nb_classes,
                batch_size=self.config.train_params.batch_size,
                segmentation_val_step=self.config.segmentation_val_step)
            callbacks.extend([iou_history, generate_sample_result])

            if self.config.optuna:
                # Trial prune for Optuna
                callbacks.append(TFKerasPruningCallback(trial, 'val_f1-score'))

        elif self.config.task == ncc.tasks.Task.CLASSIFICATION:
            if self.config.input_data_type == "video":
                # result_dir/model/
                batch_model_path = os.path.join(self.config.model_path,
                                                "batch_model.h5")

                batch_checkpoint = ncc.callbacks.BatchCheckpoint(
                    learning_path,
                    batch_model_path,
                    token=self.config.slack_token,
                    channel=self.config.slack_channel,
                    period=self.config.batch_period)
                callbacks.append(batch_checkpoint)

            if self.config.optuna:
                # Trial prune for Optuna
                callbacks.append(TFKerasPruningCallback(trial, 'val_acc'))

        if self.config.slack_channel and self.config.slack_token:
            if self.config.task == ncc.tasks.Task.SEMANTIC_SEGMENTATION:
                file_name = os.path.join(learning_path, "IoU.png")
            else:
                file_name = os.path.join(learning_path, "acc.png")

            slack_logging = ncc.callbacks.SlackLogger(
                logger_file=file_name,
                token=self.config.slack_token,
                channel=self.config.slack_channel,
                title=self.config.train_params.model_name,
            )
            callbacks.append(slack_logging)

        # Early Stoppoing
        if self.config.early_stopping:
            early_stopping = keras.callbacks.EarlyStopping(
                monitor=self.config.monitor,
                patience=self.config.patience,
                mode='auto')
            callbacks.append(early_stopping)

        return callbacks