def fit(self, module):

        # Prepare module for training
        module.trainer = self
        # Update and print module configuration
        prep_logger_and_checkpoint(module)
        print_config(module.config)

        # Send module to GPU
        module = module.to('cuda')
        # Configure optimizer and scheduler
        module.configure_optimizers()

        # Create distributed optimizer
        compression = hvd.Compression.none
        optimizer = hvd.DistributedOptimizer(module.optimizer,
            named_parameters=module.named_parameters(), compression=compression)
        scheduler = module.scheduler

        # Get train and val dataloaders
        train_dataloader = module.train_dataloader()
        val_dataloaders = module.val_dataloader()

        # Epoch loop
        for epoch in range(module.current_epoch, self.max_epochs):
            # Train
            self.train(train_dataloader, module, optimizer)
            # Validation
            validation_output = self.validate(val_dataloaders, module)
            # Check and save model
            self.check_and_save(module, validation_output)
            # Update current epoch
            module.current_epoch += 1
            # Take a scheduler step
            scheduler.step()
Exemple #2
0
    def fit(self, module):

        # Prepare module for training
        module.trainer = self
        # Update and print module configuration
        prep_logger_and_checkpoint(module)
        print_config(module.config)

        # Send module to GPU
        module = module.to('cuda')
        # Configure optimizer and scheduler
        module.configure_optimizers()
        # Create distributed optimizer
        # compression = hvd.Compression.none
        optimizer = module.optimizer
        scheduler = module.scheduler

        # Get train and val dataloaders
        train_dataloader = module.train_dataloader()
        val_dataloaders = module.val_dataloader()

        # Epoch loop
        for epoch in range(module.current_epoch, self.max_epochs):
            # Train
            self.train(train_dataloader, module, optimizer)
            # avg_train_loss = metrics['avg_train-loss']
            # avg_train_photometric_loss = metrics['avg_train-photometric_loss']
            # avg_train_smoothness_loss = metrics['avg_train-smoothness_loss']
            # avg_train_supervised_loss = metrics['avg_train-supervised_loss']

            # writer.add_scalar('Loss/avg_train_loss'
            #                   , avg_train_loss, epoch)
            # writer.add_scalar('Loss/avg_train_photometric_loss'
            #                   , avg_train_photometric_loss, epoch)
            # writer.add_scalar('Loss/avg_train_smoothness_loss'
            #                   , avg_train_smoothness_loss, epoch)
            # writer.add_scalar('Loss/avg_train_supervised_loss'
            #                   , avg_train_supervised_loss, epoch)

            # Validation
            validation_output = self.validate(val_dataloaders, module)
            # Check and save model
            self.check_and_save(module, validation_output)
            # Update current epoch
            module.current_epoch += 1
            # Take a scheduler step
            scheduler.step()