예제 #1
0
def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps):
    device, model, loss_func, optimizer, scheduler, metrics = get_trainer_base(D_in, H, D_out)

    callbacks = [   
                    LossOptimizerHandler(),
                    #ADDING ModelCheckPoint WITH save_full_trainer=True TO SAVE FULL TRAINER
                    ModelCheckPoint(checkpoint_dir=save_to_dir, 
                                    checkpoint_file_name=trainer_file_name, 
                                    callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, 
                                                                     stats_type=StatsType.VAL, 
                                                                     monitor_mode=MonitorMode.MIN),
                                    save_best_only=True, 
                                    save_full_trainer=True),
                    SchedulerStep(),
                    # SINCE ACCURACY NEEDS TO GO UP AND INACCURACY NEEDS TO GO DOWN, LETS DEFINE CallbackMonitors for StatsPrint PER EACH METRIC
                    StatsPrint(train_metrics_monitors=CallbackMonitor(monitor_type=MonitorType.METRIC,
                                                                       stats_type=StatsType.TRAIN,
                                                                       monitor_mode=MonitorMode.MAX,
                                                                       metric_name='Accuracy'))
                ]

    trainer = Trainer(model=model, 
                      device=device, 
                      loss_func=loss_func, 
                      optimizer=optimizer,
                      scheduler=scheduler,
                      metrics=metrics, 
                      train_data_loader=data_loader, 
                      val_data_loader=data_loader,
                      train_steps=data_loader_steps,
                      val_steps=data_loader_steps,
                      callbacks=callbacks,
                      name='Train-Save-Load-Predict-Example')
    return trainer
예제 #2
0
 def test_stats_print_validations(self):
     # ASSERT INVALID INIT
     self.assertRaises(ValueError,
                       StatsPrint,
                       train_metrics_monitors=CallbackMonitor(
                           monitor_type=MonitorType.METRIC,
                           stats_type=StatsType.VAL,
                           monitor_mode=MonitorMode.MAX,
                           metric_name='Accuracy'))
     # ASSERT VALID INIT
     StatsPrint(train_metrics_monitors=CallbackMonitor(
         monitor_type=MonitorType.METRIC,
         stats_type=StatsType.TRAIN,
         monitor_mode=MonitorMode.MAX,
         metric_name='Accuracy'))
예제 #3
0
def get_trainer(N, D_in, H, D_out, num_epochs, data_loader, data_loader_steps):
    base_path = os.path.dirname(__file__) + '/'
    tensorboard_data_dir = base_path + './tensorboard/'

    device, model, loss_func, optimizer, scheduler, metrics = get_trainer_base(
        D_in, H, D_out)

    callbacks = [
        LossOptimizerHandler(),
        Tensorboard(summary_writer_dir=tensorboard_data_dir),
        #ADDING ModelCheckPoint WITH save_full_trainer=True TO SAVE FULL TRAINER
        ModelCheckPoint(checkpoint_dir=save_to_dir,
                        checkpoint_file_name=trainer_file_name,
                        callback_monitor=CallbackMonitor(
                            monitor_type=MonitorType.LOSS,
                            stats_type=StatsType.VAL,
                            monitor_mode=MonitorMode.MIN),
                        save_best_only=True,
                        save_full_trainer=True),
        SchedulerStep(),
        # SINCE ACCURACY NEEDS TO GO UP AND INACCURACY NEEDS TO GO DOWN, LETS DEFINE CallbackMonitors for StatsPrint PER EACH METRIC
        StatsPrint(train_metrics_monitors=[
            CallbackMonitor(monitor_type=MonitorType.METRIC,
                            stats_type=StatsType.TRAIN,
                            monitor_mode=MonitorMode.MAX,
                            metric_name='Accuracy'),
            CallbackMonitor(monitor_type=MonitorType.METRIC,
                            stats_type=StatsType.TRAIN,
                            monitor_mode=MonitorMode.MIN,
                            metric_name='InAccuracy')
        ],
                   print_confusion_matrix_normalized=True)
    ]

    trainer = Trainer(model=model,
                      device=device,
                      loss_func=loss_func,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      metrics=metrics,
                      train_data_loader=data_loader,
                      val_data_loader=data_loader,
                      train_steps=data_loader_steps,
                      val_steps=data_loader_steps,
                      callbacks=callbacks,
                      name='Save-And-Load-Example')
    return trainer
예제 #4
0
파일: train.py 프로젝트: RoySadaka/lpd
def get_trainer(params):

    device = tu.get_gpu_device_if_available()

    # Use the nn package to define our model and loss function.
    model = Model(params['H'], params['D_out'], num_embeddings,
                  params['embedding_dim']).to(device)

    loss_func = nn.BCEWithLogitsLoss().to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.1)

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)

    metrics = BinaryAccuracyWithLogits(name='acc')

    callbacks = [
        LossOptimizerHandler(),
        SchedulerStep(apply_on_phase=Phase.BATCH_END,
                      apply_on_states=State.TRAIN),
        EarlyStopping(
            callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS,
                                             stats_type=StatsType.VAL,
                                             patience=3,
                                             monitor_mode=MonitorMode.MIN)),
        StatsPrint(round_values_on_print_to=7)
    ]

    trainer = Trainer(model=model,
                      device=device,
                      loss_func=loss_func,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      metrics=metrics,
                      train_data_loader=train_data_loader,
                      val_data_loader=val_data_loader,
                      train_steps=len(train_dataset),
                      val_steps=len(val_dataset),
                      callbacks=callbacks,
                      name='DataLoader-Example')
    return trainer
예제 #5
0
def get_trainer(config, num_embeddings, train_data_loader, val_data_loader,
                train_steps, val_steps, checkpoint_dir, checkpoint_file_name,
                summary_writer_dir):
    device = tu.get_gpu_device_if_available()

    model = TestModel(config, num_embeddings).to(device)

    optimizer = optim.SGD(params=model.parameters(),
                          lr=config.LEARNING_RATE,
                          momentum=0.9)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        patience=config.EARLY_STOPPING_PATIENCE // 2,
        verbose=True
    )  # needs SchedulerStep callback WITH scheduler_parameters_func

    loss_func = nn.BCEWithLogitsLoss().to(device)

    metrics = [
        BinaryAccuracyWithLogits(name='Accuracy'),
        TruePositives(num_classes=2, threshold=0, name='TP')
    ]

    callbacks = [
        LossOptimizerHandler(),
        SchedulerStep(scheduler_parameters_func=lambda callback_context:
                      callback_context.val_stats.get_loss()),
        Tensorboard(summary_writer_dir=summary_writer_dir),
        EarlyStopping(apply_on_phase=Phase.EPOCH_END,
                      apply_on_states=State.EXTERNAL,
                      callback_monitor=CallbackMonitor(
                          monitor_type=MonitorType.LOSS,
                          stats_type=StatsType.VAL,
                          monitor_mode=MonitorMode.MIN,
                          patience=config.EARLY_STOPPING_PATIENCE)),
        StatsPrint(apply_on_phase=Phase.EPOCH_END,
                   round_values_on_print_to=7,
                   print_confusion_matrix_normalized=True),
        ModelCheckPoint(
            checkpoint_dir=checkpoint_dir,
            checkpoint_file_name=checkpoint_file_name,
            callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS,
                                             stats_type=StatsType.VAL,
                                             monitor_mode=MonitorMode.MIN),
            save_best_only=True,
            round_values_on_print_to=7
        ),  # BETTER TO PUT ModelCheckPoint LAST (SO IN CASE IT SAVES, THE STATES OF ALL THE CALLBACKS WILL BE UP TO DATE)
    ]

    trainer = Trainer(model=model,
                      device=device,
                      loss_func=loss_func,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      metrics=metrics,
                      train_data_loader=train_data_loader,
                      val_data_loader=val_data_loader,
                      train_steps=train_steps,
                      val_steps=val_steps,
                      callbacks=callbacks,
                      name='Multi-Input-Example')
    return trainer
예제 #6
0
    def test_save_and_load(self):
        gu.seed_all(42)
        save_to_dir = os.path.dirname(__file__) + '/trainer_checkpoint/'
        trainer_file_name = 'trainer'

        device = tu.get_gpu_device_if_available()

        model = eu.get_basic_model(10, 10, 10).to(device)

        loss_func = nn.CrossEntropyLoss().to(device)
    
        optimizer = optim.Adam(model.parameters(), lr=1e-4)

        scheduler = KerasDecay(optimizer, 0.0001, last_step=-1)
        
        metrics = CategoricalAccuracyWithLogits(name='acc')

        callbacks = [   
                        LossOptimizerHandler(),
                        ModelCheckPoint(checkpoint_dir=save_to_dir, 
                                        checkpoint_file_name=trainer_file_name, 
                                        callback_monitor=CallbackMonitor(monitor_type=MonitorType.LOSS, 
                                                                         stats_type=StatsType.VAL, 
                                                                         monitor_mode=MonitorMode.MIN),
                                        save_best_only=False, 
                                        save_full_trainer=True,
                                        verbose=0),
                        SchedulerStep(apply_on_phase=Phase.BATCH_END, apply_on_states=State.TRAIN),
                        StatsPrint()
                    ]

        
        data_loader = eu.examples_data_generator(10, 10, 10, category_out=True)
        data_loader_steps = 100
        num_epochs = 5

        trainer = Trainer(model=model, 
                        device=device, 
                        loss_func=loss_func, 
                        optimizer=optimizer,
                        scheduler=scheduler,
                        metrics=metrics, 
                        train_data_loader=data_loader, 
                        val_data_loader=data_loader,
                        train_steps=data_loader_steps,
                        val_steps=data_loader_steps,
                        callbacks=callbacks,
                        name='Trainer-Test')
        
        trainer.train(num_epochs, verbose=0)

        loaded_trainer = Trainer.load_trainer(dir_path=save_to_dir,
                                            file_name=trainer_file_name + f'_epoch_{num_epochs}',
                                            model=model,
                                            device=device,
                                            loss_func=loss_func,
                                            optimizer=optimizer,
                                            scheduler=scheduler,
                                            train_data_loader=data_loader, 
                                            val_data_loader=data_loader,
                                            train_steps=data_loader_steps,
                                            val_steps=data_loader_steps)
        
        self.assertEqual(loaded_trainer.epoch, trainer.epoch)
        self.assertListEqual(tu.get_lrs_from_optimizer(loaded_trainer.optimizer), tu.get_lrs_from_optimizer(trainer.optimizer))
        self.assertEqual(loaded_trainer.callbacks[1].monitor._get_best(), trainer.callbacks[1].monitor._get_best())
예제 #7
0
    def test_save_and_predict(self):

        save_to_dir = os.path.dirname(__file__) + '/trainer_checkpoint/'
        checkpoint_file_name = 'checkpoint'
        trainer_file_name = 'trainer'

        device = tu.get_gpu_device_if_available()

        model = TestModel().to(device)

        loss_func = nn.BCEWithLogitsLoss().to(device)

        optimizer = optim.Adam(model.parameters(), lr=1e-4)

        scheduler = None

        metrics = BinaryAccuracyWithLogits(name='acc')

        callbacks = [
            LossOptimizerHandler(),
            ModelCheckPoint(checkpoint_dir=save_to_dir,
                            checkpoint_file_name=checkpoint_file_name,
                            callback_monitor=CallbackMonitor(
                                monitor_type=MonitorType.LOSS,
                                stats_type=StatsType.VAL,
                                monitor_mode=MonitorMode.MIN),
                            save_best_only=True,
                            save_full_trainer=False),
        ]

        data_loader = data_generator()
        data_loader_steps = 100
        num_epochs = 5

        trainer = Trainer(model=model,
                          device=device,
                          loss_func=loss_func,
                          optimizer=optimizer,
                          scheduler=scheduler,
                          metrics=metrics,
                          train_data_loader=data_loader,
                          val_data_loader=data_loader,
                          train_steps=data_loader_steps,
                          val_steps=data_loader_steps,
                          callbacks=callbacks,
                          name='Predictor-Trainer-Test')

        x1_x2, y = next(data_loader)
        _ = trainer.predict_batch(x1_x2)  # JUST TO CHECK THAT IT FUNCTIONS

        sample = [x1_x2[0][0], x1_x2[1][0]]

        # PREDICT BEFORE TRAIN
        sample_prediction_before_train = trainer.predict_sample(sample)

        trainer.train(num_epochs, verbose=0)

        # PREDICT AFTER TRAIN
        sample_prediction_from_trainer = trainer.predict_sample(sample)

        # SAVE THE TRAINER
        trainer.save_trainer(save_to_dir, trainer_file_name)

        #-----------------------------------------------#
        # CREATE PREDICTOR FROM CURRENT TRAINER
        #-----------------------------------------------#
        predictor_from_trainer = Predictor.from_trainer(trainer)

        # PREDICT FROM PREDICTOR
        sample_prediction_from_predictor = predictor_from_trainer.predict_sample(
            sample)

        self.assertFalse(
            (sample_prediction_before_train == sample_prediction_from_trainer
             ).all())
        self.assertTrue(
            (sample_prediction_from_predictor == sample_prediction_from_trainer
             ).all())

        #-----------------------------------------------#
        # LOAD MODEL CHECKPOINT AS NEW PREDICTOR
        #-----------------------------------------------#
        fresh_device = tu.get_gpu_device_if_available()
        fresh_model = TestModel().to(fresh_device)
        loaded_predictor = Predictor.from_checkpoint(
            save_to_dir, checkpoint_file_name + '_best_only', fresh_model,
            fresh_device)

        # PREDICT AFTER LOAD
        sample_prediction_from_loaded_predictor = loaded_predictor.predict_sample(
            sample)

        self.assertFalse(
            (sample_prediction_before_train == sample_prediction_from_trainer
             ).all())
        self.assertTrue((sample_prediction_from_loaded_predictor ==
                         sample_prediction_from_trainer).all())

        #-----------------------------------------------#
        # LOAD TRAINER CHECKPOINT AS NEW PREDICTOR
        #-----------------------------------------------#
        fresh_device = tu.get_gpu_device_if_available()
        fresh_model = TestModel().to(fresh_device)
        loaded_predictor = Predictor.from_checkpoint(save_to_dir,
                                                     trainer_file_name,
                                                     fresh_model, fresh_device)

        # PREDICT AFTER LOAD
        sample_prediction_from_loaded_predictor = loaded_predictor.predict_sample(
            sample)

        self.assertFalse(
            (sample_prediction_before_train == sample_prediction_from_trainer
             ).all())
        self.assertTrue((sample_prediction_from_loaded_predictor ==
                         sample_prediction_from_trainer).all())