def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps): device, model, loss_func, optimizer, scheduler, metrics = get_trainer_base(D_in, H, D_out) callbacks = [ LossOptimizerHandler(), #ADDING ModelCheckPoint WITH save_full_trainer=True TO SAVE FULL TRAINER ModelCheckPoint(checkpoint_dir=save_to_dir, checkpoint_file_name=trainer_file_name, callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN), save_best_only=True, save_full_trainer=True), SchedulerStep(), # SINCE ACCURACY NEEDS TO GO UP AND INACCURACY NEEDS TO GO DOWN, LETS DEFINE CallbackMonitors for StatsPrint PER EACH METRIC StatsPrint(train_metrics_monitors=CallbackMonitor(monitor_type=MonitorType.METRIC, stats_type=StatsType.TRAIN, monitor_mode=MonitorMode.MAX, metric_name='Accuracy')) ] trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=data_loader, val_data_loader=data_loader, train_steps=data_loader_steps, val_steps=data_loader_steps, callbacks=callbacks, name='Train-Save-Load-Predict-Example') return trainer
def test_stats_print_validations(self): # ASSERT INVALID INIT self.assertRaises(ValueError, StatsPrint, train_metrics_monitors=CallbackMonitor( monitor_type=MonitorType.METRIC, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MAX, metric_name='Accuracy')) # ASSERT VALID INIT StatsPrint(train_metrics_monitors=CallbackMonitor( monitor_type=MonitorType.METRIC, stats_type=StatsType.TRAIN, monitor_mode=MonitorMode.MAX, metric_name='Accuracy'))
def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps): base_path = os.path.dirname(__file__) + '/' tensorboard_data_dir = base_path + './tensorboard/' device, model, loss_func, optimizer, scheduler, metrics = get_trainer_base( D_in, H, D_out) callbacks = [ LossOptimizerHandler(), Tensorboard(summary_writer_dir=tensorboard_data_dir), #ADDING ModelCheckPoint WITH save_full_trainer=True TO SAVE FULL TRAINER ModelCheckPoint(checkpoint_dir=save_to_dir, checkpoint_file_name=trainer_file_name, callback_monitor=CallbackMonitor( monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN), save_best_only=True, save_full_trainer=True), SchedulerStep(), # SINCE ACCURACY NEEDS TO GO UP AND INACCURACY NEEDS TO GO DOWN, LETS DEFINE CallbackMonitors for StatsPrint PER EACH METRIC StatsPrint(train_metrics_monitors=[ CallbackMonitor(monitor_type=MonitorType.METRIC, stats_type=StatsType.TRAIN, monitor_mode=MonitorMode.MAX, metric_name='Accuracy'), CallbackMonitor(monitor_type=MonitorType.METRIC, stats_type=StatsType.TRAIN, monitor_mode=MonitorMode.MIN, metric_name='InAccuracy') ], print_confusion_matrix_normalized=True) ] trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=data_loader, val_data_loader=data_loader, train_steps=data_loader_steps, val_steps=data_loader_steps, callbacks=callbacks, name='Save-And-Load-Example') return trainer
def get_trainer(params): device = tu.get_gpu_device_if_available() # Use the nn package to define our model and loss function. model = Model(params['H'], params['D_out'], num_embeddings, params['embedding_dim']).to(device) loss_func = nn.BCEWithLogitsLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=0.1) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99) metrics = BinaryAccuracyWithLogits(name='acc') callbacks = [ LossOptimizerHandler(), SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN), EarlyStopping( callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, patience=3, monitor_mode=MonitorMode.MIN)), StatsPrint(round_values_on_print_to=7) ] trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=train_data_loader, val_data_loader=val_data_loader, train_steps=len(train_dataset), val_steps=len(val_dataset), callbacks=callbacks, name='DataLoader-Example') return trainer
def get_trainer(config, num_embeddings, train_data_loader, val_data_loader, train_steps, val_steps, checkpoint_dir, checkpoint_file_name, summary_writer_dir): device = tu.get_gpu_device_if_available() model = TestModel(config, num_embeddings).to(device) optimizer = optim.SGD(params=model.parameters(), lr=config.LEARNING_RATE, momentum=0.9) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', patience=config.EARLY_STOPPING_PATIENCE // 2, verbose=True ) # needs SchedulerStep callback WITH scheduler_parameters_func loss_func = nn.BCEWithLogitsLoss().to(device) metrics = [ BinaryAccuracyWithLogits(name='Accuracy'), TruePositives(num_classes=2, threshold=0, name='TP') ] callbacks = [ LossOptimizerHandler(), SchedulerStep(scheduler_parameters_func=lambda callback_context: callback_context.val_stats.get_loss()), Tensorboard(summary_writer_dir=summary_writer_dir), EarlyStopping(apply_on_phase=Phase.EPOCH_END, apply_on_states=State.EXTERNAL, callback_monitor=CallbackMonitor( monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN, patience=config.EARLY_STOPPING_PATIENCE)), StatsPrint(apply_on_phase=Phase.EPOCH_END, round_values_on_print_to=7, print_confusion_matrix_normalized=True), ModelCheckPoint( checkpoint_dir=checkpoint_dir, checkpoint_file_name=checkpoint_file_name, callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN), save_best_only=True, round_values_on_print_to=7 ), # BETTER TO PUT ModelCheckPoint LAST (SO IN CASE IT SAVES, THE STATES OF ALL THE CALLBACKS WILL BE UP TO DATE) ] trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=train_data_loader, val_data_loader=val_data_loader, train_steps=train_steps, val_steps=val_steps, callbacks=callbacks, name='Multi-Input-Example') return trainer
def test_save_and_load(self): gu.seed_all(42) save_to_dir = os.path.dirname(__file__) + '/trainer_checkpoint/' trainer_file_name = 'trainer' device = tu.get_gpu_device_if_available() model = eu.get_basic_model(10, 10, 10).to(device) loss_func = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) scheduler = KerasDecay(optimizer, 0.0001, last_step=-1) metrics = CategoricalAccuracyWithLogits(name='acc') callbacks = [ LossOptimizerHandler(), ModelCheckPoint(checkpoint_dir=save_to_dir, checkpoint_file_name=trainer_file_name, callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN), save_best_only=False, save_full_trainer=True, verbose=0), SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN), StatsPrint() ] data_loader = eu.examples_data_generator(10, 10, 10, category_out=True) data_loader_steps = 100 num_epochs = 5 trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=data_loader, val_data_loader=data_loader, train_steps=data_loader_steps, val_steps=data_loader_steps, callbacks=callbacks, name='Trainer-Test') trainer.train(num_epochs, verbose=0) loaded_trainer = Trainer.load_trainer(dir_path=save_to_dir, file_name=trainer_file_name + f'_epoch_{num_epochs}', model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, train_data_loader=data_loader, val_data_loader=data_loader, train_steps=data_loader_steps, val_steps=data_loader_steps) self.assertEqual(loaded_trainer.epoch, trainer.epoch) self.assertListEqual(tu.get_lrs_from_optimizer(loaded_trainer.optimizer), tu.get_lrs_from_optimizer(trainer.optimizer)) self.assertEqual(loaded_trainer.callbacks[1].monitor._get_best(), trainer.callbacks[1].monitor._get_best())
def test_save_and_predict(self): save_to_dir = os.path.dirname(__file__) + '/trainer_checkpoint/' checkpoint_file_name = 'checkpoint' trainer_file_name = 'trainer' device = tu.get_gpu_device_if_available() model = TestModel().to(device) loss_func = nn.BCEWithLogitsLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) scheduler = None metrics = BinaryAccuracyWithLogits(name='acc') callbacks = [ LossOptimizerHandler(), ModelCheckPoint(checkpoint_dir=save_to_dir, checkpoint_file_name=checkpoint_file_name, callback_monitor=CallbackMonitor( monitor_type=MonitorType.LOSS, stats_type=StatsType.VAL, monitor_mode=MonitorMode.MIN), save_best_only=True, save_full_trainer=False), ] data_loader = data_generator() data_loader_steps = 100 num_epochs = 5 trainer = Trainer(model=model, device=device, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, metrics=metrics, train_data_loader=data_loader, val_data_loader=data_loader, train_steps=data_loader_steps, val_steps=data_loader_steps, callbacks=callbacks, name='Predictor-Trainer-Test') x1_x2, y = next(data_loader) _ = trainer.predict_batch(x1_x2) # JUST TO CHECK THAT IT FUNCTIONS sample = [x1_x2[0][0], x1_x2[1][0]] # PREDICT BEFORE TRAIN sample_prediction_before_train = trainer.predict_sample(sample) trainer.train(num_epochs, verbose=0) # PREDICT AFTER TRAIN sample_prediction_from_trainer = trainer.predict_sample(sample) # SAVE THE TRAINER trainer.save_trainer(save_to_dir, trainer_file_name) #-----------------------------------------------# # CREATE PREDICTOR FROM CURRENT TRAINER #-----------------------------------------------# predictor_from_trainer = Predictor.from_trainer(trainer) # PREDICT FROM PREDICTOR sample_prediction_from_predictor = predictor_from_trainer.predict_sample( sample) self.assertFalse( (sample_prediction_before_train == sample_prediction_from_trainer ).all()) self.assertTrue( (sample_prediction_from_predictor == sample_prediction_from_trainer ).all()) #-----------------------------------------------# # LOAD MODEL CHECKPOINT AS NEW PREDICTOR #-----------------------------------------------# fresh_device = tu.get_gpu_device_if_available() fresh_model = TestModel().to(fresh_device) loaded_predictor = Predictor.from_checkpoint( save_to_dir, checkpoint_file_name + '_best_only', fresh_model, fresh_device) # PREDICT AFTER LOAD sample_prediction_from_loaded_predictor = loaded_predictor.predict_sample( sample) self.assertFalse( (sample_prediction_before_train == sample_prediction_from_trainer ).all()) self.assertTrue((sample_prediction_from_loaded_predictor == sample_prediction_from_trainer).all()) #-----------------------------------------------# # LOAD TRAINER CHECKPOINT AS NEW PREDICTOR #-----------------------------------------------# fresh_device = tu.get_gpu_device_if_available() fresh_model = TestModel().to(fresh_device) loaded_predictor = Predictor.from_checkpoint(save_to_dir, trainer_file_name, fresh_model, fresh_device) # PREDICT AFTER LOAD sample_prediction_from_loaded_predictor = loaded_predictor.predict_sample( sample) self.assertFalse( (sample_prediction_before_train == sample_prediction_from_trainer ).all()) self.assertTrue((sample_prediction_from_loaded_predictor == sample_prediction_from_trainer).all())