def validation_loop(self, epoch, valid_loader, valid_set, confm_list): for vi, data in tqdm(enumerate(valid_loader), desc="Validation", total=len(valid_loader), file=sys.stdout): # Read data inputs, gts = data n_images, w, h, c = inputs.size() inputs = Variable(inputs).cuda() gts = Variable(gts).cuda() # Predict model with torch.no_grad(): outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() # Compute batch stats self.val_loss.update(float(self.model.loss(outputs, gts).cpu().item() / n_images), n_images) confm = compute_confusion_matrix(predictions, gts.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) confm_list = confm_list + confm # Save epoch stats self.stats.val.conf_m = confm_list if not self.cf.normalize_loss: self.stats.val.loss = self.val_loss.avg else: self.stats.val.loss = self.val_loss.avg # Save predictions and generate overlaping self.update_tensorboard(inputs.cpu(), gts.cpu(), predictions, epoch, range(vi * self.cf.valid_batch_size, vi * self.cf.valid_batch_size + np.shape(predictions)[0]), valid_set.num_images)
def training_loop(self, epoch, train_loader): # Train epoch for i, data in tqdm(enumerate(train_loader), desc="Epoch {}/{}".format(epoch, self.cf.epochs), total=len(train_loader), file=sys.stdout): # Read Data inputs, labels = data n, c, w, h = inputs.size() inputs = Variable(inputs).cuda() self.inputs = inputs self.labels = Variable(labels).cuda() # Predict model self.model.optimizer.zero_grad() self.outputs = self.model.net(inputs) predictions = self.outputs.data.max(1)[1].cpu().numpy() # Compute gradients self.compute_gradients() # Compute batch stats self.train_loss.update(float(self.loss.cpu().item()), n) confm = compute_confusion_matrix(predictions, self.labels.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) self.confm_list = self.confm_list + confm if self.cf.normalize_loss: self.stats.train.loss = self.train_loss.avg else: self.stats.train.loss = self.train_loss.avg if not self.cf.debug: # Save stats self.save_stats_batch((epoch - 1) * self.train_num_batches + i)
def validation_loop(self, epoch, valid_loader, valid_set, bar, global_bar, confm_list): for vi, data in enumerate(valid_loader): # Read data inputs, gts = data n_images, w, h, c = inputs.size() inputs = Variable(inputs).cuda() gts = Variable(gts).cuda() # Predict model with torch.no_grad(): outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() print(type(data)) print(inputs.shape) print(outputs.shape) print(gts.shape) print(type(self.model.loss(outputs, gts).cpu())) print(self.model.loss(outputs, gts).item()) # Compute batch stats #self.val_loss.update(float(self.model.loss(outputs, gts).cpu().data[0] / n_images), n_images) self.val_loss.update( float(self.model.loss(outputs, gts).item() / n_images), n_images) confm = compute_confusion_matrix(predictions, gts.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) confm_list = list(map(operator.add, confm_list, confm)) # Save epoch stats self.stats.val.conf_m = confm_list if not self.cf.normalize_loss: self.stats.val.loss = self.val_loss.avg else: self.stats.val.loss = self.val_loss.avg # Save predictions and generate overlaping self.update_tensorboard( inputs.cpu(), gts.cpu(), predictions, epoch, range( vi * self.cf.valid_batch_size, vi * self.cf.valid_batch_size + np.shape(predictions)[0]), valid_set.num_images) # Update messages if not self.cf.silent: self.update_msg(bar, global_bar)
def training_loop(self, epoch, train_loader, epoch_bar): # Train epoch for i, data in enumerate(train_loader): # Read Data inputs, labels = data N, w, h, c = inputs.size() inputs = Variable(inputs).cuda() self.inputs = inputs self.labels = Variable(labels).cuda() # Predict model self.model.optimizer.zero_grad() self.outputs = self.model.net(inputs) predictions = self.outputs.data.max(1)[1].cpu().numpy() # Compute gradients self.compute_gradients() # Compute batch stats self.train_loss.update(float(self.loss.cpu().item()), N) confm = compute_confusion_matrix( predictions, self.labels.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) self.confm_list = map(operator.add, self.confm_list, confm) if self.cf.normalize_loss: self.stats.train.loss = self.train_loss.avg else: self.stats.train.loss = self.train_loss.avg if not self.cf.debug: # Save stats self.save_stats_batch((epoch - 1) * self.train_num_batches + i) # Update epoch messages if not self.cf.silent: self.update_epoch_messages(epoch_bar, self.global_bar, self.train_num_batches, epoch, i)
def start(self, criterion, valid_set, valid_loader, epoch=None, global_bar=None): confm_list = np.zeros((self.cf.num_classes,self.cf.num_classes)) val_loss = AverageMeter() # Initialize epoch progress bar val_num_batches = math.ceil(valid_set.num_images / float(self.cf.valid_batch_size)) prev_msg = '\nValidation estimated time...\n' bar = ProgressBar(val_num_batches, lenBar=20) bar.set_prev_msg(prev_msg) bar.update(show=False) # Validate model for vi, data in enumerate(valid_loader): # Read data inputs, gts = data n_images,w,h,c = inputs.size() inputs = Variable(inputs, volatile=True).cuda() gts = Variable(gts, volatile=True).cuda() # Predict model outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() # Compute batch stats val_loss.update(criterion(outputs, gts).data[0] / n_images, n_images) confm = compute_confusion_matrix(predictions,gts.cpu().data.numpy(),self.cf.num_classes,self.cf.void_class) confm_list = map(operator.add, confm_list, confm) # Save epoch stats self.stats.val.conf_m = confm_list self.stats.val.loss = val_loss.avg / (w * h * c) # Update messages self.update_msg(bar, global_bar) # Compute stats self.compute_stats(np.asarray(self.stats.val.conf_m), val_loss) # Save stats self.save_stats(epoch)
def start(self, criterion, optimizer, train_loader, train_set, valid_set=None, valid_loader=None, scheduler=None): train_num_batches = math.ceil(train_set.num_images / float(self.cf.train_batch_size)) val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / float(self.cf.valid_batch_size)) # Define early stopping control if self.cf.early_stopping: early_Stopping = Early_Stopping(self.cf) else: early_Stopping = None prev_msg = '\nTotal estimated training time...\n' global_bar = ProgressBar((self.cf.epochs+1-self.curr_epoch)*(train_num_batches+val_num_batches), lenBar=20) global_bar.set_prev_msg(prev_msg) # Train process for epoch in range(self.curr_epoch, self.cf.epochs + 1): # Shuffle train data train_set.update_indexes() # Initialize logger epoch_time = time.time() self.logger_stats.write('\t ------ Epoch: ' + str(epoch) + ' ------ \n') # Initialize epoch progress bar self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % (epoch, self.cf.epochs + 1 - self.curr_epoch) epoch_bar = ProgressBar(train_num_batches, lenBar=20) epoch_bar.update(show=False) # Initialize stats train_loss = AverageMeter() confm_list = np.zeros((self.cf.num_classes, self.cf.num_classes)) # Train epoch for i, data in enumerate(train_loader): # Read Data inputs, labels = data N,w,h,c = inputs.size() inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() # Predict model optimizer.zero_grad() outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() # Compute gradients loss = criterion(outputs, labels) loss.backward() optimizer.step() # Compute batch stats train_loss.update(loss.data[0], N) confm = compute_confusion_matrix(predictions, labels.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) confm_list = map(operator.add, confm_list, confm) self.stats.train.loss = train_loss.avg / (w*h*c) # Save stats self.save_stats_batch((epoch - 1) * train_num_batches + i) # Update epoch messages self.update_epoch_messages(epoch_bar, global_bar, train_num_batches,epoch, i) # Save stats self.stats.train.conf_m = confm_list self.compute_stats(np.asarray(confm_list),train_loss) self.save_stats_epoch(epoch) # Validate epoch self.validate_epoch(valid_set, valid_loader, criterion, early_Stopping, epoch, global_bar) # Update scheduler if scheduler is not None: scheduler.step(self.stats.val.loss) # Saving model if needed self.model.net.save(self.stats) # Update display values self.update_messages(epoch, epoch_time) if self.stop: return # Save model without training if self.cf.epochs == 0: self.model.save_model(self.model.net)