Example #1
0
 def run_model(self):
     """
     Iterate through epochs with both train and validation pahses
     """
     cur_time = datetime.now().strftime("%H:%M:%S")
     
     _fix_seeds()
     
     for epoch in range(self.num_epochs):
         _fix_seeds()
         
         print(f"Starting epoch: {epoch} | time: {cur_time}")
         print('LR:',[pg['lr'] for pg in self.optimizer.param_groups])
         for phase in ['train', 'valid']:
             
             curr_loss, dice_mean = self.run_epoch(phase)
             self.loss_history[phase].append(curr_loss)
             self.dice_history[phase].append(dice_mean)
             
             if phase == 'valid':
                 print(f'Valid loss: {curr_loss}')
                 print(f'Valid dice mean: {dice_mean}')
             
         if self.best_loss > self.loss_history['valid'][-1]:
             self.best_loss = self.loss_history['valid'][-1]
             torch.save(self.model, self.path_to_save_model)
             print('*** Model saved! ***\n')
             
         cur_time = datetime.now().strftime("%H:%M:%S")
Example #2
0
 def run_model(self):
     """
     Iterate through epochs with both train and validation pahses
     """
     cur_time = datetime.now().strftime("%H:%M:%S")
     
     _fix_seeds()
     
     for epoch in range(self.num_epochs):
         _fix_seeds()
         
         print(f"Starting epoch: {epoch} | time: {cur_time}")
         print('LR:',[pg['lr'] for pg in self.optimizer.param_groups])
         for phase in ['train', 'valid']:
             
             epoch_all_metrics = self.run_epoch(phase)
             self.loss_history[phase].append(epoch_all_metrics[0])
             self.dice_history[phase].append(epoch_all_metrics[1])
             self.epoch_additional_metrics[phase].append(epoch_all_metrics[2:])
             
             if phase == 'valid':
                 print(f'Valid avg loss: {epoch_all_metrics[0]}')
                 print(f'Valid avg dice: {epoch_all_metrics[1]}')
             
             del epoch_all_metrics
             torch.cuda.empty_cache()
             
         if self.best_loss > self.loss_history['valid'][-1]:
             self.best_loss = self.loss_history['valid'][-1]
             torch.save(self.model, self.path_to_save_model)
             print('*** Model saved! ***\n')
             
         cur_time = datetime.now().strftime("%H:%M:%S")
Example #3
0
    def evaluate_model(best_model,
                       valid_dataset,
                       valid_sampler,
                       device,
                       cls_thresholds={1: 0.5, 2: 0.5, 3: 0.5, 4: 0.5},
                       cls_min_area={1: 500, 2: 500, 3: 2000, 4: 2000},
                       batch_size=16,
                       num_workers=8):
        """
        Model validation with post processing
        """        
        _fix_seeds()
        # load saved model
        best_model.to(device)
        best_model.eval()
        
        # create validation loader
        loader = DataLoader(valid_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          sampler=valid_sampler,
                          worker_init_fn=_init_fn,
                          pin_memory=False,
                          num_workers=num_workers)
        
        dices = np.array([])
        
        # iterate over data
        with tqdm(loader, desc='valid', file=sys.stdout) as iterator:
            for (batch_img, batch_mask) in iterator:
                # to gpu
                batch_img = batch_img.to(device)
                #batch_mask = batch_mask.to(device)
                        
                batch_preds = best_model(batch_img)
                batch_preds = torch.sigmoid(batch_preds)   
                batch_preds = batch_preds.detach().cpu().numpy()
                batch_mask = batch_mask.detach().numpy()
                
                batch_preds_processed = []
                for preds in batch_preds:
                    for cls, pred in enumerate(preds):
                        pred = post_process(pred, cls_thresholds[cls + 1], cls_min_area[cls + 1])
                        batch_preds_processed.append(pred)
                
                batch_preds_processed = np.array(batch_preds_processed)
                
                bs = batch_preds.shape[0]
                w = batch_preds.shape[2]
                h = batch_preds.shape[3]

                # 4 classes - 4 rows for every image
                batch_preds_processed = batch_preds_processed.reshape((bs * 4, w, h))
                batch_mask = batch_mask.reshape((bs * 4, w, h))
                
                dices_batch = dice_coeff_batch(batch_preds_processed, batch_mask)
                dices = np.append(dices, dices_batch)
        
        # avg dice on all validation
        return np.mean(dices), dices
 def __getitem__(self, idx):
     # fix seed
     _fix_seeds(SEED)
     
     # upload image (numpy array)
     image_id = self.img_list[idx]
     image = jpeg.JPEG(self.files_path + image_id).decode()
     
     # upload mask (rle string)
     masks_dict = self.train_dict[image_id]
     mask = []
     
     for cls in masks_dict:
         cur_rle = masks_dict[cls]
         if cur_rle != '':
             # get numpy array from string
             cur_mask = rle2mask(cur_rle)
             mask.append(cur_mask)
         else:
             cur_mask = np.zeros((256,1600), dtype=np.uint8)
             mask.append(cur_mask)
     mask = np.array(mask).swapaxes(0, 1).swapaxes(1,2)       
     
     if self.transform is not None:
         aug_item = self.transform(image=image, mask=mask)
     
     if not self.valid:
         return torch.from_numpy(aug_item['image']).permute(2, 0, 1), \
            torch.from_numpy(aug_item['mask']).permute(2, 0, 1).float()
     else:
         return image_id, \
             torch.from_numpy(aug_item['image']).permute(2, 0, 1), \
             torch.from_numpy(aug_item['mask']).permute(2, 0, 1).float()
Example #5
0
    def cross_val_score(self,
                        model_params,
                        optimizer_params,
                        scheduler_params,
                        cv_schema):
        """
        Make N folds cross-validation and average predictions from every fold
        """
        _fix_seeds()
        dataset_len = len(self.datasets['train']) # does not matter train ot valid if they are equal, except transforms
        indices = list(range(dataset_len))
        
        for fold_num, (train_indices, valid_indices) in enumerate(cv_schema.split(indices)):
            
            print('*******************************************')
            print('Starting fold {}'.format(fold_num))
            
            # make samplers and loaders        
            inds_dict = {'train':train_indices, 'valid':valid_indices}
            for phase in ['train', 'valid']:
                self.samplers[phase] = self.make_sampler(phase, inds_dict[phase])
                #self.samplers[phase] = ClassProbSampler(inds_dict[phase], 
                #                                        self.class_weights_sampler, 
                #                                        self.train_image_class)
                self.loaders[phase] = self.make_loader(phase, self.samplers[phase])

            # initialize model and process train and validation for this fold
            model, optimizer, lr_scheduler = self.make_model(model_params, 
                                                             optimizer_params,
                                                             scheduler_params)
            self.run_model(model, optimizer, lr_scheduler, fold_num=str(fold_num))
            print('Fold {} finished!\n'.format(fold_num))
    def __getitem__(self, idx):
        # fix seed
        _fix_seeds(SEED)
        
        # upload image (numpy array)
        image_id = self.img_list[idx]
        image = jpeg.JPEG(self.files_path + image_id).decode()
        
        # upload mask (rle string)
        masks_dict = self.train_dict[image_id]
        labels = []
        
        for cls in masks_dict:
            cur_rle = masks_dict[cls]
            if cur_rle != '':
                # get numpy array from string
                labels.append(1)
            else:
                labels.append(0)
        labels = np.array(labels)      
        
        if self.transform is not None:
            aug_item = self.transform(image=image)

        return torch.from_numpy(aug_item['image']).permute(2, 0, 1), \
               torch.from_numpy(labels).float()
 def __getitem__(self, idx):
     _fix_seeds(SEED)
     image_id = self.img_list[idx]
     image = jpeg.JPEG(self.files_path + image_id).decode()
     
     aug_item = {}
     
     if self.transform is not None:
         aug_item = self.transform(image=image)
     else:
         aug_item['image'] = image
         
     return image_id, torch.from_numpy(aug_item['image']).permute(2, 0, 1).float()
Example #8
0
    def __init__(self, 
                 model, path_to_save_model, 
                 loss, 
                 #metrics, # TODO: every metric as class
                 optimizer, scheduler,
                 pred_threshold,
                 train_loader,
                 valid_loader, 
                 device,
                 num_classes=4,
                 num_epochs=20):
        
        #torch.set_default_tensor_type("torch.FloatTensor")
        self.path_to_save_model = path_to_save_model
        self.num_epochs = num_epochs
        self.num_classes = num_classes
        self.best_loss = float("inf")
        self.device = device
        _fix_seeds()
        self.model = model
        self.model.to(self.device)
        
        self.loss = loss
        #self.metrics = metrics # TODO: every metric as class
        self.pred_threshold = pred_threshold
        
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.dataloaders = {
            'train': train_loader,
            'valid': valid_loader
        }

        # metrics history
        self.loss_history = {
            'train': [],
            'valid': []
        }
        self.dice_history = {
            'train': [],
            'valid': []
        }
        self.epoch_additional_metrics = {
            'train': [],
            'valid': []            
        }
Example #9
0
    def run_epoch(self, phase):
        """
        Run single epoch of train or validation
        
        phase: str, 'train' or 'valid'
        """
        # enter mode
        if phase == 'train':
            self.model.train()
        else:
            self.model.eval()
            
        # fix seeds
        _fix_seeds()   
        
        loader = self.dataloaders[phase]
        data_len = len(loader)
        
        logs = {}
        epoch_loss = 0
        epoch_dice = []
        # for mean dice calculation over all data
        if phase == 'valid':
            predictions = torch.tensor(np.array([]), device=self.device, dtype=torch.float32)
            ground_truth = torch.tensor(np.array([]), device=self.device, dtype=torch.float32)
        
        # iterate over data
        with tqdm(loader, desc=phase, file=sys.stdout) as iterator:
            for (batch_img, batch_mask) in iterator:
                
                _fix_seeds()
                # to gpu
                batch_img = batch_img.to(self.device)
                batch_mask = batch_mask.to(self.device)
        
                if phase == 'train':
                    batch_loss, batch_preds = self.forward_train(batch_img, batch_mask)
                else:                   
                    batch_loss, batch_preds = self.forward_valid(batch_img, batch_mask)   
                    
                batch_preds = torch.sigmoid(batch_preds)
                epoch_loss += batch_loss.item()

                # flat predictions and masks
                batch_size = batch_img.shape[0]
                w = batch_img.shape[2]
                h = batch_img.shape[3]
                
                # 4 classes - 4 rows for every image
                preds_flat = batch_preds.view((batch_size * self.num_classes, w, h))
                mask_flat = batch_mask.view((batch_size * self.num_classes, w, h))
                # binarization
                preds_flat = (preds_flat > self.pred_threshold).float()
                mask_flat = (mask_flat > 0.5).float()
                
                if phase == 'valid':
                    predictions = torch.cat((predictions, preds_flat))
                    ground_truth = torch.cat((ground_truth, mask_flat))
                
                # avg dice on batch
                dice_batch = dice_coeff_mean(preds_flat, mask_flat)
                logs['dice_batch'] = dice_batch
                epoch_dice.append(dice_batch)
                
                del batch_mask, batch_img, batch_preds, preds_flat, mask_flat         
                #torch.cuda.empty_cache()
                
                # save current batch loss value for output
                loss_logs = {self.loss.__name__: batch_loss.item()}
                logs.update(loss_logs) 
                s = self._format_logs(logs)
                iterator.set_postfix_str(s)
        
            if phase == 'valid':
                dice_mean = dice_coeff_mean(predictions, ground_truth)
                del predictions, ground_truth
            else:
                dice_mean = np.mean(epoch_dice)
                
        epoch_loss /= data_len
        self.scheduler.step(epoch_loss)
        torch.cuda.empty_cache()
        
        return epoch_loss, dice_mean
Example #10
0
 def __init__(self, 
              path_to_save_model, 
              loss, 
              #metrics, # TODO: every metric as class
              train_dataset, train_loader_params, train_sampler_params,
              valid_dataset, valid_loader_params, valid_sampler_params,
              valid_fraction,
              #train_image_class,
              device,
              pred_threshold,
              #class_weights_sampler={0: 64/29., 1: 64/4., 2: 64/2., 3: 64/25., 4: 64/4.},
              num_classes=4,
              num_epochs=20):
     
     #torch.set_default_tensor_type("torch.FloatTensor")
     _fix_seeds()
     self.path_to_save_model = path_to_save_model
     self.num_epochs = num_epochs
     self.num_classes = num_classes
     self.best_loss = float("inf")
     self.device = device
     self.valid_fraction = valid_fraction
     
     self.loss = loss
     #self.metrics = metrics # TODO: every metric as class
     self.pred_threshold = pred_threshold
     
     self.model = None
     self.optimizer = None  #optimizer
     self.init_lr = None  #[pg['lr'] for pg in self.optimizer.param_groups][0]
     self.scheduler = None   #scheduler
     # data
     self.datasets = {
         'train': train_dataset, # same datasets with different transforms
         'valid': valid_dataset
     }
     self.loader_params = {
         'train': train_loader_params,
         'valid': valid_loader_params
     }
     self.sampler_params = {
         'train': train_sampler_params,
         'valid': valid_sampler_params,
     }
     self.loaders = {
         'train': None,  # create during cross validation
         'valid': None
     }
     self.samplers = {
         'train': None,  # create during cross validation
         'valid': None
     }
     # metrics history
     self.loss_history = {
         'train': [],
         'valid': []
     }
     self.dice_history = {
         'train': [],
         'valid': []
     }
     self.epoch_additional_metrics = {
         'train': [],
         'valid': []            
     }
Example #11
0
    def run_epoch(self, phase):
        """
        Run single epoch of train or validation
        
        phase: str, 'train' or 'valid'
        """
        # enter mode
        if phase == 'train':
            self.model.train()
        else:
            self.model.eval()
            
        # fix seeds
        _fix_seeds()   
        
        loader = self.dataloaders[phase]
        data_len = len(loader)
        
        logs = {}
        epoch_loss = 0
        epoch_meter = Meter(phase, self.pred_threshold, self.device)
        
        # iterate over data
        with tqdm(loader, desc=phase, file=sys.stdout) as iterator:
            for (batch_img, batch_mask) in iterator:
                
                _fix_seeds()
                # to gpu
                batch_img = batch_img.to(self.device)
                batch_mask = batch_mask.to(self.device)
        
                if phase == 'train':
                    batch_loss, batch_preds = self.forward_train(batch_img, batch_mask)
                else:                   
                    batch_loss, batch_preds = self.forward_valid(batch_img, batch_mask)   
                
                dice_batch = epoch_meter.update_batch(batch_mask, batch_preds)
                epoch_loss += batch_loss.item()

                logs['dice_batch'] = dice_batch
                
                del batch_mask, batch_img, batch_preds#, preds_flat, mask_flat         
                #torch.cuda.empty_cache()
                
                # save current batch loss value for output
                loss_logs = {self.loss.__name__: batch_loss.item()}
                logs.update(loss_logs)
                s = self._format_logs(logs)
                iterator.set_postfix_str(s)
        
        dice_mean, epoch_dices = epoch_meter.update_epoch()
        #epoch_cnt_neg_pred, epoch_cnt_neg_mask = epoch_meter.update_epoch()
        #epoch_cnt_pos_pred, epoch_cnt_pos_mask, \
        #epoch_precision_batch, epoch_recall_batch = epoch_meter.update_epoch()
        
        epoch_loss /= data_len
        #self.scheduler.step(epoch_loss)
        self.scheduler.step()
        
        del epoch_meter
        torch.cuda.empty_cache()
        
        return epoch_loss, dice_mean, epoch_dices#, \
Example #12
0
 def run_model(self, model, optimizer, scheduler, fold_num=None):
     """
     Iterate through epochs with both train and validation pahses
     """
     _fix_seeds()
     cur_time = datetime.now().strftime("%H:%M:%S")
     # initialize model, learning rate and loss for each run
     self.best_loss = float("inf")
     self.model = model
     self.model.to(self.device)
     
     self.optimizer = optimizer
     self.scheduler = scheduler
     self.init_lr = [pg['lr'] for pg in self.optimizer.param_groups][0]
     
     for g in self.optimizer.param_groups:
         g['lr'] = self.init_lr
     
     if fold_num:
         temp_str = self.path_to_save_model
         temp_lst = temp_str.split('.')
         fold_num_str = '_' + str(fold_num) + '.'
         model_path = ''.join(['..'] + temp_lst[:-1] + [fold_num_str] + [temp_lst[-1]])
     else:
         model_path = self.path_to_save_model
         
         # generate train and valid indices
         data_size = len(self.datasets['train'])
         val_split = int(np.floor(self.valid_fraction * data_size))
         indices = list(range(data_size))
         np.random.shuffle(indices)
         valid_indices, train_indices = indices[:val_split], indices[val_split:]
         inds_dict = {'train': train_indices, 'valid': valid_indices}
         
         # create samplers and loaders
         for phase in ['train', 'valid']:
             self.samplers[phase] = self.make_sampler(phase, inds_dict[phase])
             #self.samplers[phase] = ClassProbSampler(inds_dict[phase], 
             #                                        self.class_weights_sampler, 
             #                                        self.train_image_class)
             self.loaders[phase] = self.make_loader(phase, self.samplers[phase])
     
     for epoch in range(self.num_epochs):
         _fix_seeds()
         
         print(f"Starting epoch: {epoch} | time: {cur_time}")
         print('LR:',[pg['lr'] for pg in self.optimizer.param_groups])
         for phase in ['train', 'valid']:
             
             epoch_all_metrics = self.run_epoch(phase)
             self.loss_history[phase].append(epoch_all_metrics[0])
             self.dice_history[phase].append(epoch_all_metrics[1])
             self.epoch_additional_metrics[phase].append(epoch_all_metrics[2:])
             
             if phase == 'valid':
                 print(f'Valid avg loss: {epoch_all_metrics[0]}')
                 print(f'Valid avg dice: {epoch_all_metrics[1]}')
             
             del epoch_all_metrics
             torch.cuda.empty_cache()
             
         if self.best_loss > self.loss_history['valid'][-1]:
             self.best_loss = self.loss_history['valid'][-1]
             torch.save(self.model, model_path)
             print('*** Model saved! ***\n')
             
         cur_time = datetime.now().strftime("%H:%M:%S")
Example #13
0
def _init_fn(worker_id):
    _fix_seeds()
    def run_epoch(self, phase):
        """
        Run single epoch of train or validation
        
        phase: str, 'train' or 'valid'
        """
        # enter mode
        if phase == 'train':
            self.model.train()
        else:
            self.model.eval()

        # fix seeds
        _fix_seeds()

        loader = self.loaders[phase]
        data_len = len(loader)

        logs = {}
        epoch_loss = 0
        acc_epoch = 0
        acc_cls_arr = np.array([]).reshape(0, self.num_classes)
        precis_cls_arr = np.array([]).reshape(0, self.num_classes)
        recall_cls_arr = np.array([]).reshape(0, self.num_classes)
        #epoch_meter = Meter(phase, self.pred_threshold, self.device)

        # iterate over data
        with tqdm(loader, desc=phase, file=sys.stdout) as iterator:
            for (batch_img, batch_mask) in iterator:

                _fix_seeds()
                # to gpu
                batch_img = batch_img.to(self.device)
                batch_mask = batch_mask.to(self.device)

                if phase == 'train':
                    batch_loss, batch_preds = self.forward_train(
                        batch_img, batch_mask)
                else:
                    batch_loss, batch_preds = self.forward_valid(
                        batch_img, batch_mask)

                #dice_batch = epoch_meter.update_batch(batch_mask, batch_preds)
                epoch_loss += batch_loss.item()
                batch_metrics = batch_metrics_update(batch_mask, batch_preds,
                                                     self.pred_thresholds)

                logs['acc_batch'] = batch_metrics[0]
                acc_epoch += batch_metrics[0]
                acc_cls_arr = np.vstack((acc_cls_arr, batch_metrics[1]))
                precis_cls_arr = np.vstack((precis_cls_arr, batch_metrics[2]))
                recall_cls_arr = np.vstack((recall_cls_arr, batch_metrics[3]))

                del batch_mask, batch_img, batch_preds, batch_metrics
                #torch.cuda.empty_cache()

                # save current batch loss value for output
                loss_logs = {self.loss.__name__: batch_loss.item()}
                logs.update(loss_logs)
                s = self._format_logs(logs)
                iterator.set_postfix_str(s)

        #dice_mean, epoch_dices = epoch_meter.update_epoch()
        #epoch_cnt_neg_pred, epoch_cnt_neg_mask = epoch_meter.update_epoch()
        #epoch_cnt_pos_pred, epoch_cnt_pos_mask, \
        #epoch_precision_batch, epoch_recall_batch = epoch_meter.update_epoch()
        acc_epoch /= data_len
        acc_cls_arr = acc_cls_arr.mean(0)
        precis_cls_arr = precis_cls_arr.mean(0)
        recall_cls_arr = recall_cls_arr.mean(0)

        epoch_loss /= data_len
        self.scheduler.step()
        #self.scheduler.step(epoch_loss)

        #del epoch_meter
        torch.cuda.empty_cache()

        return epoch_loss, acc_epoch, acc_cls_arr,\
               precis_cls_arr, recall_cls_arr