def val_net(net, device, loader, criterion, batch_size): net.eval() val_loss = AverageMeter() time_start = time.time() with torch.no_grad(): for batch_idx, (data, gt) in enumerate(loader): # Use GPU or not data, gt = data.to(device), gt.to(device) # Forward predictions = net(data) # Loss Calculation loss = criterion(predictions, gt) # Updates the record val_loss.update(loss.item(), predictions.size(0)) print('[{}/{} ({:.0f}%)]\t\tLoss: {:.6f}'.format( batch_idx * len(data), len(loader)*batch_size, 100. * batch_idx / len(loader), loss.item())) time_dif = time.time() - time_start print('\nValidation set: Average loss: '+ str(val_loss.avg)) print('Validation time: It tooks %.4fs to finish the Validation.' % (time_dif)) return val_loss.avg
def eval_fn(data_loader, model, criterion, device): loss_score = AverageMeter() model.eval() tk0 = tqdm(enumerate(data_loader), total=len(data_loader)) with torch.no_grad(): for step, data in tk0: images = data['images'].to(device) targets = data['target'].to(device) batch_size = images.shape[0] images = images.to(device) targets = targets.to(device) output = model(images, targets) loss = criterion(output, targets) loss_score.update(loss.detach().item(), batch_size) tk0.set_postfix(Eval_Loss=loss_score.avg) return {"loss": loss_score}
def train(train_loader, net, criterion, optimizer, epoch, train_args): train_loss = AverageMeter() curr_iter = (epoch - 1) * len(train_loader) for i, data in enumerate(train_loader): inputs, labels = data assert inputs.size()[2:] == labels.size()[1:], ("inputs are {}" "output is {}".format( inputs.size()[2:], labels.size()[1:] )) N = inputs.size(0) inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = net(inputs) assert outputs.size()[2:] == labels.size()[1:] assert outputs.size()[1] == 21 loss = criterion(outputs, labels) / N loss.backward() optimizer.step() train_loss.update(loss.data, N) curr_iter += 1 if (i + 1) % train_args['print_freq'] == 0: print('[epoch %d], [iter %d / %d], [train loss %.5f]' % ( epoch, i + 1, len(train_loader), train_loss.avg ))
def train(inputs, labels, net, criterion, optimizer, epoch, train_args): train_loss = AverageMeter() inputs, labels = shuffle(inputs, labels) for idx in range(0,inputs.__len__()-1,2): input1 = inputs[idx] input2 = inputs[idx+1] label1 = labels[idx] label2 = labels[idx+1] input = np.concatenate((input1, input2), axis=0) label = np.concatenate((label1, label2), axis=0) input_t = torch.from_numpy(input) label_t = torch.from_numpy(label) N = input_t.size(0) # batch-size input_t = Variable(input_t).cuda() label_t = Variable(label_t).cuda() output_t = net(input_t) loss = criterion(output_t, label_t) loss.backward() optimizer.step() train_loss.update(loss.data, N) if (idx) % train_args['print_freq'] == 0 or (idx + 1) % train_args['print_freq'] == 0: print('[epoch %d], [iter %d / %d], [train loss %.5f]' % ( epoch, idx + 1, len(inputs), train_loss.avg ))
def train_fn(dataloader, model, criterion, optimizer, device, epoch_th, scheduler=None): model.train() loss_score = AverageMeter() tk0 = tqdm(enumerate(dataloader), total=len(dataloader)) for step, data in tk0: images = data['images'].to(device) targets = data['target'].to(device) batch_size = images.shape[0] optimizer.zero_grad() output = model(images, targets) loss = criterion(output, targets) loss.backward() optimizer.step() loss_score.update(loss.detach().item(), batch_size) tk0.set_postfix(Train_Loss=loss_score.avg, Epoch=epoch_th, LR=optimizer.param_groups[0]['lr']) if scheduler is not None: scheduler.step() return {"loss": loss_score}
def train_reorganized(trainloader, model, criterion, optimizer, epochs): # train the model model.train() top1 = AverageMeter() losses = AverageMeter() for epoch in range(epochs): for batch_idx, (inputs) in enumerate(trainloader): targets = torch.LongTensor( np.tile(np.arange(inputs.size(1)), inputs.size(0))) inputs = inputs.reshape(-1, inputs.size(-3), inputs.size(-2), inputs.size(-1)) inputs, targets = torch.autograd.Variable( inputs.cuda()), torch.autograd.Variable(targets.cuda()) outputs, _ = model(inputs) loss = criterion(outputs, targets) prec1 = simple_accuracy(outputs.data.cpu(), targets.data.cpu()) top1.update(prec1, inputs.size(0)) losses.update(loss.data.cpu(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % 10 == 0: print('Epoch: [{} | {}], batch: {}, loss: {}, Accuracy: {}'. format(epoch + 1, epochs, batch_idx + 1, losses.avg, top1.avg))
def test(self): val_mse = AverageMeter() val_ssim = AverageMeter() with torch.no_grad(): self.model.eval() for i, data in enumerate(self.test_loader): # Unpack input_img = data[0].to(self.device, dtype=torch.float) depth_gt = data[1].to(self.device, dtype=torch.float) # Step depth_pred = self.model(input_img) MSE = self.criterion(depth_pred, depth_gt) SSIM = ssim_criterion(depth_pred, depth_gt) val_mse.update(MSE.item(), self.bs) val_ssim.update(SSIM.item(), self.bs) if (self.save_image): if not os.path.exists(os.path.join(self.log_dir, 'results')): os.makedirs(os.path.join(self.log_dir, 'results')) save_image( input_img[0].cpu(), '{}/results/color_{}.png'.format(self.log_dir, i)) save_image(depth_gt[0].cpu(), '{}/results/gt_{}.png'.format(self.log_dir, i)) save_image( depth_pred[0].cpu(), '{}/results/predict_{}.png'.format(self.log_dir, i)) image = cv2.imread( '{}/results/gt_{}.png'.format(self.log_dir, i), 0) colormap = plt.get_cmap('inferno') heatmap = (colormap(image) * 2**16).astype( np.uint16)[:, :, :3] heatmap = cv2.cvtColor(heatmap, cv2.COLOR_RGB2BGR) cv2.imwrite('{}/results/gt_{}.png'.format(self.log_dir, i), heatmap) image = cv2.imread( '{}/results/predict_{}.png'.format(self.log_dir, i), 0) heatmap = (colormap(image) * 2**16).astype( np.uint16)[:, :, :3] heatmap = cv2.cvtColor(heatmap, cv2.COLOR_RGB2BGR) cv2.imwrite( '{}/results/predict_{}.png'.format(self.log_dir, i), heatmap) print('Testing: {}'.format(i)) # # log # if i % 20 == 0: # self.logger.add_image('val/{}/color'.format(i), torch.clamp(torch.pow(input_img.cpu()[0], 0.454545), 0, 1), i) # self.logger.add_image('val/{}/depth_pred'.format(i), torch.clamp(torch.pow(depth_pred.cpu()[0], 0.454545), 0, 1), i) # self.logger.add_image('val/{}/depth_gt'.format(i), torch.clamp(torch.pow(depth_gt.cpu()[0], 0.454545), 0, 1), i) print('avg MSE: {}'.format(val_mse.avg)) print('avg SSIM: {}'.format(val_ssim.avg))
def validate(loader, net, criterion, optimizer, epoch, args, device, dtype): """ Function for validating a network's performance afer one epoch of training Input(s): - loader (PyTorch loader object): loader for queueing minibatches - net (module object): PyTorch network module object - criterion (loss object): PyTorch loss function - optimizer (optimizer object): PyTorch optimizer function - epoch (int): current training epoch - args (parser object): parser containing command-line inputs - device (PyTorch device) - dtype (PyTorch datatype) Output(s): - val_loss.avg (float): average of val_loss for all mini-batches in validation set - mean_iou (float) = average mean iou score over all ground-truth masks and respective predictions in the validation set """ net.eval() val_loss = AverageMeter() with torch.no_grad(): preds_list = [] masks_list = [] for v, (x, y, name) in enumerate(loader): x = x.to(device=device, dtype=dtype) y = y.to(device=device, dtype=torch.long) scores = net(x) loss = criterion(scores, y) val_loss.update(loss.item()) batch_masks = y.data.cpu().numpy() batch_preds = F.softmax(scores, dim=1 ).data.cpu().numpy()[:, 1, :, :] # Assemble evaluation ingredients preds_list.append(batch_preds.squeeze()) masks_list.append(batch_masks) # Evaluate performance preds = np.concatenate(preds_list) masks = np.concatenate(masks_list) mean_iou = evaluate(preds, masks) print '--------------------------------------' print '[epoch %d], [val_loss %.4f], [mean_iou %.4f]'%( epoch, val_loss.avg, mean_iou) print '--------------------------------------' net.train() return val_loss.avg, mean_iou
def train(train_sets, net, criterion, optimizer, epoch, train_args): train_loss = AverageMeter() cur_iter = 0 random.shuffle(train_sets) for train_set in train_sets: data = train_set['data'] datashape = data.shape[1:] zeropad_shape = np.ceil(np.divide(datashape, 8)).astype(np.int) * 8 p = zeropad_shape - datashape # padding p_b = np.ceil(p / 2).astype(np.int) # padding before image p_a = np.floor(p / 2).astype(np.int) # padding after image data = np.pad(data, ((0, 0), (p_b[0], p_a[0]), (p_b[1], p_a[1]), (p_b[2], p_a[2])), mode='constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) inputs = data[:5, :, :, :] inputs = np.expand_dims(inputs, axis=0) labels = data[5:6, :, :, :] labels[labels != 0] = 1 # Find just the tumor labels = np.int64(labels) labels = np.eye(2)[labels] labels = np.moveaxis(labels, -1, 1) labels = np.float32(labels) inputs = torch.from_numpy(inputs) labels = torch.from_numpy(labels) N = inputs.size(0) # batch-size inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() outputs = net(inputs) loss = criterion(outputs, labels) / N loss.backward() optimizer.step() train_loss.update(loss.data, N) if (cur_iter) % train_args['print_freq'] == 0: print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (epoch, cur_iter, len(train_sets), train_loss.avg)) cur_iter += 1
def train(train_loader, model, criterion, optimizer, use_cuda): # Switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(train_loader)) for batch_idx, (inputs, targets) in enumerate(train_loader): # Measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() targets = targets.squeeze( 1) # pytorch 0.4.0 merged Variable and Tensor # inputs, targets = V(inputs), V(targets.squeeze(1)) # Compute output outputs = model(inputs) loss = criterion(outputs, targets) # Measure accuracy and record loss prec1 = accuracy(outputs.data, targets.data, topk=(1, )) losses.update(loss.item(), inputs.size(0)) top1.update(prec1[0], inputs.size(0)) # Compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # Measure elapsed time batch_time.update(time.time() - end) end = time.time() # Plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Top1: {top1:.4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() return (losses.avg, top1.avg)
def validate(self, epoch): val_final_loss = AverageMeter() with torch.no_grad(): self.model.eval() for i, data in enumerate(self.test_loader): # Unpack input_img = data[0].to(self.device, dtype=torch.float) depth_gt = data[1].to(self.device, dtype=torch.float) # Step depth_pred = self.model(input_img) l1_loss = self.L1_criterion(depth_pred, depth_gt) ssim_loss = torch.clamp( (1 - ssim_criterion(depth_pred, depth_gt)) * 0.5, 0, 1) grad_loss = gradient_criterion(depth_gt, depth_pred, self.device) total_loss = self.alpha * l1_loss + self.beta * ssim_loss + self.theta * grad_loss total_loss /= (self.alpha + self.beta + self.theta) val_final_loss.update(total_loss.item(), self.bs) print("Iter {}/{}, loss: {:.4f}".format( i, len(self.test_loader), total_loss.item())) # log if i % 20 == 0: self.logger.add_scalar('val/loss_total', total_loss.item(), epoch * self.testset_len + i) self.logger.add_image( 'val/{}/depth_pred'.format(i), torch.clamp(torch.pow(depth_pred.cpu()[0], 0.454545), 0, 1), epoch * self.testset_len + i) self.logger.add_image( 'val/{}/depth_gt'.format(i), torch.clamp(torch.pow(depth_gt.cpu()[0], 0.454545), 0, 1), epoch * self.testset_len + i) if val_final_loss.avg < self.val_best_loss: self.val_best_loss = val_final_loss.avg self._save_ckpt(epoch, is_val=True)
def train(self, epoch): """ Train for an epoch """ epoch_loss = AverageMeter() self.model.train() for i, data in enumerate(self.train_loader): # Unpack input_img = data[0].to(self.device, dtype=torch.float) depth_gt = data[1].to(self.device, dtype=torch.float) # Step self.optimizer.zero_grad() depth_pred = self.model(input_img) l1_loss = self.L1_criterion(depth_pred, depth_gt) ssim_loss = torch.clamp( (1 - ssim_criterion(depth_pred, depth_gt)) * 0.5, 0, 1) grad_loss = gradient_criterion(depth_gt, depth_pred, self.device) total_loss = self.alpha * l1_loss + self.beta * ssim_loss + self.theta * grad_loss total_loss /= (self.alpha + self.beta + self.theta) total_loss.backward() self.optimizer.step() epoch_loss.update(total_loss.item(), self.bs) self.logger.add_scalar("train/loss_l1", l1_loss.item(), self.iter_nums) self.logger.add_scalar("train/loss_ssim", ssim_loss.item(), self.iter_nums) self.logger.add_scalar("train/loss_grad", grad_loss.item(), self.iter_nums) self.logger.add_scalar("train/loss_total", total_loss.item(), self.iter_nums) print("Iter {}/{}, loss: {:.4f}".format(self.iter_nums, len(self.train_loader), total_loss.item())) self.iter_nums += 1 self.logger.add_scalar("train_epoch/loss_total", epoch_loss.avg, epoch) self._save_ckpt(epoch + 1)
def train_cae(trainloader, model, criterion, optimizer, epochs): """Valid for both CAE+MSELoss and CAE+DRAELoss""" model.train() losses = AverageMeter() for epoch in range(epochs): for batch_idx, (inputs, _) in enumerate(trainloader): inputs = torch.autograd.Variable(inputs.cuda()) outputs = model(inputs) loss = criterion(inputs, outputs) losses.update(loss.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if (batch_idx+1) % 10 == 0: print('Epoch: [{} | {}], batch: {}, loss: {}'.format(epoch + 1, epochs, batch_idx + 1, losses.avg))
def validation(val_loader, model, criterion, use_cuda): # Switch to evaluate mode model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(val_loader)) for batch_idx, (inputs, targets) in enumerate(val_loader): # Measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # inputs, targets = V(inputs, volatile=True), V(targets.squeeze(1), volatile=True) # UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead. with torch.no_grad(): targets = targets.squeeze(1) # Compute output outputs = model(inputs) loss = criterion(outputs, targets) # Measure accuracy and record loss prec1 = accuracy(outputs.data, targets.data, topk=(1,)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1[0], inputs.size(0)) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() # Plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(loader, net, criterion, optimizer, epoch, args, device, dtype): """ Function for training a network through one epoch Inputs: - loader (PyTorch loader object): loader for queueing minibatches - net (module object): PyTorch network module object - criterion (loss object): PyTorch loss function - optimizer (optimizer object): PyTorch optimizer function - epoch (int): current training epoch - args (parser object): parser containing command-line inputs - device (PyTorch device) - dtype (PyTorch datatype) Output: - trn_log (list): list of training losses for epoch """ train_loss = AverageMeter() trn_log = [] for t, (x, y, names) in enumerate(loader): net.train() x = x.to(device=device, dtype=dtype) y = y.to(device=device, dtype=torch.long) scores = net(x) loss = criterion(scores, y) optimizer.zero_grad() loss.backward() optimizer.step() train_loss.update(loss.item()) trn_log.append(train_loss.val) if (t + 1) % args.print_every == 0: print '[epoch %d], [iter %d / %d], [train loss %.4f]' % ( epoch, t + 1, len(loader), train_loss.avg) return trn_log
def train_net(net, device, loader, optimizer, criterion, batch_size, isWCE=False): net.train() train_loss = AverageMeter() time_start = time.time() for batch_idx, (data, gt, weights) in enumerate(loader): # Use GPU or not data, gt = data.to(device), gt.to(device) # Forward predictions = net(data) # Loss Calculation if not isWCE: loss = criterion(predictions, gt) else: weights = weights.to(device) loss = criterion(predictions, gt, weights) # Updates the record train_loss.update(loss.item(), predictions.size(0)) # Backpropagation optimizer.zero_grad() loss.backward() optimizer.step() print('[{}/{} ({:.0f}%)]\t\tLoss: {:.6f}'.format( batch_idx * len(data), len(loader)*batch_size, 100. * batch_idx / len(loader), loss.item())) time_dif = time.time() - time_start print('\nAverage Training Loss: ' + str(train_loss.avg)) print('Train Time: It tooks %.4fs to finish the epoch.' % (time_dif)) return train_loss.avg
def validate(inputs, labels, net, criterion, optimizer, epoch, train_args): net = net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for idx in range(0,inputs.__len__()-1,2): input1 = inputs[idx] input2 = inputs[idx+1] label1 = labels[idx] label2 = labels[idx+1] input = np.concatenate((input1, input2), axis=0) label = np.concatenate((label1, label2), axis=0) input_t = torch.from_numpy(input) label_t = torch.from_numpy(label) N = input_t.size(0) # batch-size input_t = Variable(input_t).cuda() label_t = Variable(label_t).cuda() with torch.no_grad(): output = net(input_t) loss = criterion(output, label_t) val_loss.update(loss.data, N) predictions = output.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() label = np.argmax(label, axis=1) gts_all.append(label.squeeze()) predictions_all.append(predictions) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, 4) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_mean-iu_%.5f_lr_%.10f' % ( epoch, val_loss.avg, mean_iu, optimizer.param_groups[0]['lr']) torch.save(net.state_dict(), os.path.join(savedir_nets2, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(savedir_nets2, snapshot_name + '_opt.pth')) torch.save(net.state_dict(), os.path.join(savedir_nets2, 'bestnet.pth')) torch.save(optimizer.state_dict(), os.path.join(savedir_nets2, 'bestnet_opt.pth')) print('--------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('--------------------------------------------------------------------') net.train() return
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for data in val_loader: inputs, gts = data N = inputs.size(0) inputs = inputs.to(device) gts = gts.to(device) with torch.no_grad(): outputs = net(inputs) predictions = outputs.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() val_loss.update(criterion(outputs, gts).data / N, N) if random.random() > train_args['val_img_sample_rate']: inputs_all.append(None) else: inputs_all.append(inputs.squeeze_(0).cpu()) gts_all.append(gts.squeeze_(0).cpu().numpy()) predictions_all.append(predictions) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, 21) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc val_visual = [] for data in zip(inputs_all, gts_all, predictions_all): if data[0] is None: continue input_pil = restore(data[0]) gt_pil = colorize_mask(data[1]) predictions_pil = colorize_mask(data[2]) val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = make_grid(val_visual, nrow=3, padding=5) print('--------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('--------------------------------------------------------------------') net.train() return val_loss.avg, val_visual
def validate(val_sets, net, criterion, optimizer, epoch, train_args): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for val_set in val_sets: data = val_set['data'] datashape = data.shape[1:] zeropad_shape = np.ceil(np.divide(datashape, 8)).astype(np.int) * 8 p = zeropad_shape - datashape # padding p_b = np.ceil(p / 2).astype(np.int) # padding before image p_a = np.floor(p / 2).astype(np.int) # padding after image data_pad = np.pad(data, ((0, 0), (p_b[0], p_a[0]), (p_b[1], p_a[1]), (p_b[2], p_a[2])), mode='constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) inputs = data_pad[:5, :, :, :] # just use t1 & flair inputs = np.expand_dims(inputs, axis=0) labels = data_pad[5:6, :, :, :] labels[labels != 0] = 1 labels = np.int64(labels) labels = np.eye(2)[labels] labels = np.moveaxis(labels, -1, 1) labels = np.float32(labels) inputs = torch.from_numpy(inputs) labels = torch.from_numpy(labels) N = inputs.size(0) # batch-size inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() with torch.no_grad(): outputs = net(inputs) loss = criterion(outputs, labels) / N val_loss.update(loss.data, N) predictions = outputs.data.max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() p_up = predictions.shape - p_a predictions = predictions[p_b[0]:p_up[0], p_b[1]:p_up[1], p_b[2]:p_up[2]] gts_all.append(data[5:6, :, :, :].squeeze()) predictions_all.append(predictions) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, N) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_mean-iu_%.5f_lr_%.10f' % ( epoch, val_loss.avg, mean_iu, optimizer.param_groups[0]['lr']) torch.save(net.state_dict(), os.path.join(savedir_nets1, 'bestnet.pth')) torch.save(optimizer.state_dict(), os.path.join(savedir_nets1, 'bestnet_opt.pth')) torch.save(net.state_dict(), os.path.join(savedir_nets1, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(savedir_nets1, snapshot_name + '_opt.pth')) print( '--------------------------------------------------------------------') print( '[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print( 'best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % (train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print( '--------------------------------------------------------------------') net.train() return
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for vi, data in enumerate(val_loader): inputs, gts = data N = inputs.size(0) inputs = Variable(inputs).cuda() gts = Variable(gts).cuda() with torch.no_grad(): outputs = net(inputs) predictions = outputs.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() val_loss.update(criterion(outputs, gts).data / N, N) if random.random() > train_args['val_img_sample_rate']: inputs_all.append(None) else: inputs_all.append(inputs.squeeze_(0).cpu()) gts_all.append(gts.squeeze_(0).cpu().numpy()) predictions_all.append(predictions) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, 21) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr'] ) #torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) #torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: pass #to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch)) #check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)): if data[0] is None: continue input_pil = restore(data[0]) gt_pil = colorize_mask(data[1]) predictions_pil = colorize_mask(data[2]) if train_args['val_save_to_img_file']: pass #input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx)) #predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx)) #gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = vutils.make_grid(val_visual, nrow=3, padding=5) print('--------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('--------------------------------------------------------------------') net.train() return val_loss.avg, val_visual
if args.cuda: data, target = data.cuda(), target.cuda() w_loss = loss_func(model, data, target) if args.prune: l2_aug_loss = 0.0 else: l2_aug_loss = 0.5 * args.rho * \ sum([torch.sum((conv_weights[i] - conv_weights_dup[i] + conv_weights_dual[i]) ** 2) for i in range(n_conv_layers)]) if args.prox: l2_aug_loss = l2_aug_loss.data.item() primal_loss = w_loss + l2_aug_loss # losses stats losses.update(primal_loss.item(), data.size(0)) # update network weights optimizer.zero_grad() primal_loss.backward() # apply weight_decay apply_weight_decay(model_weights, args.weight_decay) # gradient norm clip if args.gclip > 0: total_norm = clip_grad_norm_(model.parameters(), args.gclip, norm_type=float('inf')) if total_norm > args.gclip: gclip_time += 1 optimizer.step() if (not args.prune) and args.prox: