def test_tfkeras_pruning_callback_monitor_is_invalid() -> None: study = optuna.create_study(pruner=DeterministicPruner(True)) trial = study.ask() callback = TFKerasPruningCallback(trial, "InvalidMonitor") with pytest.warns(UserWarning): callback.on_epoch_end(0, {"loss": 1.0})
def objective(trial): # hyperparameter search space units = trial.suggest_int("units", 25, 250) dropout = trial.suggest_categorical("dropout", [0, 0.1, 0.2, 0.3, 0.4]) batch_size = trial.suggest_categorical("batch_size", [32, 64, 96, 128]) epochs = trial.suggest_int("epochs", 5, 30) lr = trial.suggest_loguniform("lr", 1e-5, 1e-1) model = Sequential() model.add(LSTM(units=units, input_shape=input_shape, dropout=dropout)) model.add(Dense(num_labels, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=lr), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_valid, y_valid), callbacks=[TFKerasPruningCallback(trial, 'val_loss')]) #validation_data=(x_valid,y_valid), callbacks=None) # Evaluate the model accuracy on the validation set. score = model.evaluate(x_valid, y_valid, batch_size=batch_size, verbose=0) return score[1]
def objective(trial): # type: (optuna.trial.Trial) -> float model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, activation="sigmoid", input_dim=20)) model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) # TODO(Yanase): Unify the metric with 'accuracy' after stopping TensorFlow 1.x support. callback_metric_name = "accuracy" if pkg_resources.parse_version( tf.__version__) < pkg_resources.parse_version("2.0.0"): callback_metric_name = "acc" model.fit( np.zeros((16, 20), np.float32), np.zeros((16, ), np.int32), batch_size=1, epochs=1, callbacks=[TFKerasPruningCallback(trial, callback_metric_name)], verbose=0, ) return 1.0
def objective(trial): # Clear clutter from previous TensorFlow graphs. tf.keras.backend.clear_session() # Metrics to be monitored by Optuna. if tf.__version__ >= "2": monitor = "val_accuracy" else: monitor = "val_acc" # Create tf.keras model instance. model = create_model(trial) # Create dataset instance. ds_train = train_dataset() ds_eval = eval_dataset() # Create callbacks for early stopping and pruning. callbacks = [ tf.keras.callbacks.EarlyStopping(patience=3), TFKerasPruningCallback(trial, monitor), ] # Train model. history = model.fit( ds_train, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, validation_data=ds_eval, validation_steps=VALIDATION_STEPS, callbacks=callbacks, ) return history.history[monitor][-1]
def fit_evaluate(trial): with tf.device("/GPU:0"): try: model = PairModel( n_blocks=trial.suggest_int("n_blocks", 1, 6), base_filters=trial.suggest_int("base_filters", 8, 32, log=True), kernel_size=trial.suggest_int("kernel_size", 3, 11), pool_strides=trial.suggest_int("pool_strides", 2, 5), dilation_rate=trial.suggest_int("dilation_rate", 1, 5), hidden_size=trial.suggest_int("hidden_size", 32, 512, log=True), input_shape=(pair_gen_train.window_size, pair_gen_train.channel_size), ) except Exception: # Invalid model architecture return 0.0 initial_learning_rate = trial.suggest_loguniform( "initial_learning_rate", 1e-5, 1e-2 ) lr_schedule = tf.keras.experimental.CosineDecay( initial_learning_rate=initial_learning_rate, decay_steps=STEPS_PER_EPOCH * EPOCHS, ) loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule) model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"]) # Checkpointing utc_timestamp = calendar.timegm(trial.datetime_start.utctimetuple()) run_name = f"trial-{trial.number:05d}-{utc_timestamp}" run_path = Path("checkpoints") / run_name checkpoint_path = run_path / "epoch-{epoch:02d}" checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(str(checkpoint_path)) history = model.fit( ds_pairs_train.batch(BATCH_SIZE), steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS, validation_data=validation_data, callbacks=[checkpoint_cb, TFKerasPruningCallback(trial, "val_accuracy")], ) with open(run_path / "history.pkl", "wb") as f: pickle.dump(history.history, f) with open(run_path / "params.json", "w") as f: json.dump(trial.params, f, sort_keys=True, indent=2) _, accuracy = model.evaluate(validation_data) return accuracy
def objective(trial): # type: (optuna.trial.Trial) -> float model = tf.keras.Sequential() model.add(tf.keras.layers.Dense(1, activation='sigmoid', input_dim=20)) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit(np.zeros((16, 20), np.float32), np.zeros((16, ), np.int32), batch_size=1, epochs=1, callbacks=[TFKerasPruningCallback(trial, 'acc')], verbose=0) return 1.0
def test_tfkeras_pruning_callback_observation_isnan() -> None: study = optuna.create_study(pruner=DeterministicPruner(True)) trial = study.ask() callback = TFKerasPruningCallback(trial, "loss") with pytest.raises(optuna.TrialPruned): callback.on_epoch_end(0, {"loss": 1.0}) with pytest.raises(optuna.TrialPruned): callback.on_epoch_end(0, {"loss": float("nan")})
def test_tfkeras_pruning_callback_observation_isnan(): # type: () -> None study = optuna.create_study(pruner=DeterministicPruner(True)) trial = create_running_trial(study, 1.0) callback = TFKerasPruningCallback(trial, 'loss') with pytest.raises(optuna.structs.TrialPruned): callback.on_epoch_end(0, {'loss': 1.0}) with pytest.raises(optuna.structs.TrialPruned): callback.on_epoch_end(0, {'loss': float('nan')})
def objective(trial): # Clear clutter from previous TensorFlow graphs. tf.keras.backend.clear_session() # Metrics to be monitored by Optuna. if tf.__version__ >= "2": monitor = "val_accuracy" else: monitor = "val_acc" # Create tf.keras model instance. model = create_model(trial) # Create dataset instance. ds_train = train_dataset() ds_eval = eval_dataset() # Create callbacks for early stopping and pruning. callbacks = [ tf.keras.callbacks.EarlyStopping(patience=3), TFKerasPruningCallback(trial, monitor), ] # Train model. history = model.fit( ds_train, epochs=EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, validation_data=ds_eval, validation_steps=VALIDATION_STEPS, callbacks=callbacks, ) # TODO(@sfujiwara): Investigate why the logger here is called twice. # tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG) # tf.compat.v1.logging.info('hello optuna') return history.history[monitor][-1]
def get_callbacks(cls, tensorboard: bool = True, consine_annealing=False, reduce_lr_on_plateau=True, early_stopping=True, monitor='val_loss', **kwargs): """よく利用するコールバックを簡単に取得できるようにします。 デフォルトではTensorBoard,ReduceLROnPlateau(),EarlyStopping(val_loss非更新、10エポックで停止)が自動的に選ばれます。 また、tftk.ENABLE_SUSPEND_RESUME_TRAINING()が有効な場合、中断/再開を可能にするSuspendCallbackが使用されます。 Parameters: tensorboard : TensorBoardのログをBaseDir/TrainingName以下に保存する場合はTrueを指定する。未指定時 True cosine_annealing : CosineAnnealingを行い学習率をコントロールする場合はTrue、未指定時 False reduce_lr_on_plateau : ReduceLROnPlateauで停滞時に学習率を下げる場合はTrue 、未指定時 True ealy_stopping : EarlyStoppingで停滞時に学習を終了する場合、True。 未指定時 True. csv_logger : CSVLoggerを使用し、学習の記録を行う monitor {str} -- [description] (default: {"val_acc"}) Keyword Arguments: profile_batch{str} -- プロファイルを行う際の開始バッチ、終了バッチを指定します。Noneの場合実行しません。 annealing_epoch : コサイン・アニーリング全体のエポック数、指定なし 100エポック init_lr : コサイン・アニーリングする際の最初の学習率、未指定時 0.01 min_lr : 最小の学習率、コサイン・アニーリング時 = 1e-6, ReduceOnPlateau時1e-6 patience : ReduceOnPlateau使用時にpatienceエポック数、モニター値の更新がない場合、factor分学習率を下げる。 early_stopping_patience : EalyStopping利用時に、このエポック数、monitor値の更新がなければ、学習を終了する。 Returns: List[tf.keras.callbacks.Callback] -- 学習に使用するコールバックのList Example: from tftk.callbacks import HandyCallback callbacks = HandyCallback.get_callbacks(early_stopping_patience=15) """ context = Context.get_instance() base = context[Context.TRAINING_BASE_DIR] name = context[Context.TRAINING_NAME] traing_dir = Context.get_training_path() if tf.io.gfile.exists(traing_dir) == False: tf.io.gfile.makedirs(traing_dir) callbacks = [] if tensorboard is True: # print("Callback-TensorBoard") tensorboard_log_dir = traing_dir + os.path.sep + "log" profile_batch = kwargs.get("profile_batch", None) if profile_batch != None: callbacks.append( tf.keras.callbacks.TensorBoard(log_dir=tensorboard_log_dir, profile_batch=profile_batch, histogram_freq=1)) else: callbacks.append( tf.keras.callbacks.TensorBoard( log_dir=tensorboard_log_dir)) # if save_weights == True: # print("Callback-ModelCheckPoint") # save_path = base + os.path.sep + "model.hdf5" # callbacks.append(tf.keras.callbacks.ModelCheckpoint(filepath=save_path,monitor="val_acc",save_best_only=True,save_weights_only=True)) if consine_annealing == True: print("Callback-CosineAnnealing") annealing_epoch = kwargs.get("annealing_epoch", 100) init_lr = kwargs.get("init_lr", 0.01) min_lr = kwargs.get("min_lr", 1e-6) cosine_annealer = CosineAnnealingScheduler(annealing_epoch, eta_max=init_lr, eta_min=min_lr) callbacks.append(cosine_annealer) reduce_lr_on_plateau = False if reduce_lr_on_plateau == True: print("Callback-ReduceOnPlateau") patience = kwargs.get("patience", 5) factor = kwargs.get("factor", 0.25) min_lr = kwargs.get("min_lr", 1e-6) callbacks.append( tf.keras.callbacks.ReduceLROnPlateau(patience=patience, factor=factor, verbose=1, min_lr=min_lr)) if early_stopping == True & context.get(Context.OPTUNA, False) == False: early_stopping_patience = kwargs.get("early_stopping_patience", 8) callbacks.append( tf.keras.callbacks.EarlyStopping( monitor=monitor, patience=early_stopping_patience, verbose=1)) if IS_SUSPEND_RESUME_TRAINING() == True: print("Suspend Resume Callback") callbacks.append(SuspendCallback(monitor=monitor)) if context.get(Context.OPTUNA, False) == True: print("Using Optuna") # callbacks.append() trial = context.get(Context.OPTUNA_TRIAL) callbacks.append(TFKerasPruningCallback(trial, monitor=monitor)) return callbacks
def _do_set_callbacks_task(self, base_model, train_dataset, valid_dataset, trial): # Save Model Checkpoint # result_dir/model/ model_save_file = os.path.join(self.config.model_path, "best_model.h5") checkpoint = keras.callbacks.ModelCheckpoint(filepath=model_save_file, save_best_only=True) # Learning Rate Schedule if self.config.train_params.scheduler: funcs = self.config.train_params.scheduler["functions"] scheduler_name = next(iter(funcs.keys())) scheduler_params = next(iter(funcs.values())) scheduler_params["n_epoch"] = self.config.epochs scheduler_params[ "base_lr"] = self.config.train_params.learning_rate scheduler = keras.callbacks.LearningRateScheduler( getattr(schedulers, scheduler_name)(**scheduler_params)) else: scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=10, verbose=1) # Plot History # result_dir/learning/ learning_path = self.config.learning_path plot_history = ncc.callbacks.PlotHistory( learning_path, ['loss', 'acc', 'iou_score', 'f1-score']) plot_learning_rate = ncc.callbacks.PlotLearningRate(learning_path) callbacks = [checkpoint, scheduler, plot_history, plot_learning_rate] if self.config.task == ncc.tasks.Task.SEMANTIC_SEGMENTATION: # Plot IoU History iou_history = ncc.callbacks.IouHistory( save_dir=learning_path, valid_dataset=valid_dataset, class_names=self.config.class_names, batch_size=self.config.train_params.batch_size) # Predict validation # result_dir/image/validation/ val_save_dir = os.path.join(self.config.image_path, "validation") generate_sample_result = ncc.callbacks.GenerateSampleResult( val_save_dir=val_save_dir, valid_dataset=valid_dataset, nb_classes=self.config.nb_classes, batch_size=self.config.train_params.batch_size, segmentation_val_step=self.config.segmentation_val_step) callbacks.extend([iou_history, generate_sample_result]) if self.config.optuna: # Trial prune for Optuna callbacks.append(TFKerasPruningCallback(trial, 'val_f1-score')) elif self.config.task == ncc.tasks.Task.CLASSIFICATION: if self.config.input_data_type == "video": # result_dir/model/ batch_model_path = os.path.join(self.config.model_path, "batch_model.h5") batch_checkpoint = ncc.callbacks.BatchCheckpoint( learning_path, batch_model_path, token=self.config.slack_token, channel=self.config.slack_channel, period=self.config.batch_period) callbacks.append(batch_checkpoint) if self.config.optuna: # Trial prune for Optuna callbacks.append(TFKerasPruningCallback(trial, 'val_acc')) if self.config.slack_channel and self.config.slack_token: if self.config.task == ncc.tasks.Task.SEMANTIC_SEGMENTATION: file_name = os.path.join(learning_path, "IoU.png") else: file_name = os.path.join(learning_path, "acc.png") slack_logging = ncc.callbacks.SlackLogger( logger_file=file_name, token=self.config.slack_token, channel=self.config.slack_channel, title=self.config.train_params.model_name, ) callbacks.append(slack_logging) # Early Stoppoing if self.config.early_stopping: early_stopping = keras.callbacks.EarlyStopping( monitor=self.config.monitor, patience=self.config.patience, mode='auto') callbacks.append(early_stopping) return callbacks