def load_model(self): if self.opt.model == 'light_unet': model = Light_UNet(self.opt.num_channels, self.opt.num_classes).to(self.device) elif self.opt.model == 'lighter_unet': model = Lighter_UNet(self.opt.num_channels, self.opt.num_classes).to(self.device) else: raise Exception('Undifined model!') myprint( "Numer of parameters in model {} : {}".format( self.opt.model, sum(p.numel() for p in model.parameters())), self.logger) return model
def main_train(): #Intialize parameter parser and logger arg_parser = get_parser_with_args() opt = arg_parser.parse_args() opt.result_dir = opt.result_dir + '_' + opt.model mkdir(opt.result_dir) log_file_name = opt.result_dir + '/' + opt.log_file logger = get_logger(log_file_name) myprint("******************Begin Training************************\n", logger) myprint("Begin! Args for training: {} \n".format(opt), logger) #Training process modelManager = ModelManager(opt, logger, pretrained_model_path=None) modelManager.train() modelManager.predict()
def main_predict(): #Intialize parameter parser and logger arg_parser=get_parser_with_args() opt=arg_parser.parse_args() opt.result_dir=opt.result_dir+'_'+opt.model + '_' + opt.optimizer +'_'+opt.loss+'_'+str(opt.lr)+'_('+str(opt.image_size)+','+str(opt.image_size)+')_batsize_'+str(opt.batch_size)+'_weightdecay_'+str(opt.weight_decay)+'_patience_'+str(opt.patience) mkdir(opt.result_dir) log_file_name=opt.result_dir+'/'+opt.log_file logger=get_logger(log_file_name) myprint("******************Begin Predicting************************\n",logger) myprint("Begin! Args for Predicting: {} \n".format(opt),logger) #Load model manager pretrained_model_path=opt.result_dir+'/'+opt.model+'_'+opt.optimizer+'_lr_'+str(opt.lr)+'_loss_'+opt.loss modelManager=ModelManager(opt, logger,pretrained_model_path) #predict. modelManager.predict()
def load_model(self): if self.opt.model == 'base_unet': model = UNet(self.opt.num_channels, self.opt.num_classes).to(self.device) elif self.opt.model == 'spectral_unet': model = spetralUNet(self.opt.num_channels, self.opt.num_classes).to(self.device) elif self.opt.model == 'deeplab': model = DeepLab_V3plus(backbone='mobilenet', output_stride=16, num_classes=self.opt.num_classes, sync_bn=True, freeze_bn=False).to(self.device) else: raise Exception('Undifined model!') myprint( "Numer of parameters in model {} : {}".format( self.opt.model, sum(p.numel() for p in model.parameters())), self.logger) return model
def main_train(): startTime = time.time() #Intialize parameter parser and logger arg_parser=get_parser_with_args() opt=arg_parser.parse_args() opt.result_dir=opt.result_dir+'_'+opt.model + '_' + opt.optimizer +'_'+opt.loss+'_'+str(opt.lr)+'_('+str(opt.image_size)+','+str(opt.image_size)+')_batsize_'+str(opt.batch_size)+'_weightdecay_'+str(opt.weight_decay)+'_patience_'+str(opt.patience) mkdir(opt.result_dir) log_file_name=opt.result_dir+'/'+opt.log_file logger=get_logger(log_file_name) myprint("******************Begin Training************************\n",logger) myprint("Begin! Args for training: {} \n".format(opt),logger) #Training process modelManager=ModelManager(opt, logger,pretrained_model_path=None) modelManager.train() endTime = time.time() myprint("Training time: {} \n".format(endTime - startTime), logger)
def __init__(self, opt, logger, pretrained_model_path=None): super(ModelManager, self).__init__() self.opt = opt self.logger = logger self.save_model_path = opt.result_dir + '/' + opt.model + '_' + opt.optimizer + '_lr_' + str( opt.lr) + '_loss_' + opt.loss self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') myprint( 'GPU Avaliable? \n' + str(torch.cuda.is_available()) + '\nNumber of GPU:' + str(torch.cuda.device_count()), self.logger) # Load the model if pretrained_model_path == None: myprint('Train the model {} from scrath'.format(opt.model), self.logger) self.model = self.load_model() else: myprint('Load pretrained model: {}'.format(pretrained_model_path), self.logger) self.model = torch.load(pretrained_model_path) # Load the optimizer and criterion self.optimizer = self.get_optimizer() self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.5, patience=5, verbose=True) self.criterion = get_criterion(opt) self.train_loader = get_data_loader(self.opt.train_dir, self.opt, aug=True, shuffle=True) self.val_loader = get_data_loader(self.opt.val_dir, self.opt, aug=False, shuffle=False)
def train(self): TrainingProcessMetrics = {} ValProcessMetrics = {} best_metrix = {'val_loss': 1000} counting = 0 for epoch in range(1, self.opt.num_epochs + 1): myprint("Epoch {}/{}: ".format(epoch, self.opt.num_epochs), self.logger) #train mean_train_matrix = self.train_one_epoch() TrainingProcessMetrics['Epoch_' + str(epoch)] = mean_train_matrix #validation mean_val_matrix = self.eval(self.val_loader) ValProcessMetrics['Epoch_' + str(epoch)] = mean_val_matrix self.logger.info( "Epoch {}/{}: Training:{}\n Validation: {} \n".format( epoch, self.opt.num_epochs, mean_train_matrix, mean_val_matrix)) #update the learning rate val_loss = mean_val_matrix['loss'] self.scheduler.step(val_loss) #update the image of the training process self.plot_training_process(TrainingProcessMetrics, ValProcessMetrics) #save the best model. if mean_val_matrix['loss'] < best_metrix['val_loss']: myprint( "save weights to {}, performance improved from {:.05f} to {:.05f}" .format(self.save_model_path, best_metrix['val_loss'], mean_val_matrix['loss']), self.logger) counting = 0 torch.save(self.model, self.save_model_path) best_metrix['val_loss'] = mean_val_matrix['loss'] else: counting += 1 myprint( "performance not improved from {:.05f}, counting={}". format(best_metrix['val_loss'], counting), self.logger) #stop training if performance not improved for long time. if counting >= self.opt.patience and epoch >= self.opt.min_epoch: myprint( "performance not improved for {} epochs, so stop training!" .format(counting), self.logger) break #save the training results training_process_file = self.opt.result_dir + '/training_process.txt' training_process_results = { 'valiation_metrix': ValProcessMetrics, 'train_metrix': TrainingProcessMetrics } save_dictionary(training_process_results, training_process_file) myprint("Finished training!", self.logger)