def test(test_dataset, model, mapping, args, filename): """ Produces a csv on predictions of the test dataset """ batch_time = AverageMeter('Time', ':6.3f') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) progress = ProgressMeter(len(test_loader), [batch_time], prefix='Test: ') # switch to evaluate mode model.eval() print(mapping) idx_to_class = dict() for k, v in mapping.items(): idx_to_class[v] = k # Append results to csv results = open(filename, 'a') image = 0 with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(test_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) # compute output output = model(images) _, pred = output.topk(1, 1, True, True) pred = pred.t() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # write classifications predictions = pred.cpu().detach().numpy()[0] result = "" for prediction in predictions: test = 'test_{}.JPEG'.format(image) classification = idx_to_class[prediction] result += '{} {}\n'.format(test, classification) image += 1 results.write(result) results.close()
def validate(val_loader, model, criterion, args, filename, epoch=-1): """ Validate model accuracy on val_loader data. ### Arguments: val_loader - torch.utils.data.DataLoader: validation dataset wrapped in a dataloader. model - PyTorch Model: the model to validate. criterion - torch.nn: Loss Function to use. args - dict: arguments passed. filename - str: file to write results to. ### Return: float- top1 average accuracy. """ batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) # Append results to csv results = open(filename, 'a') result = '{},{},{}\n'.format(epoch, top1.avg, top5.avg) results.write(result) results.close() return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, args): """ Train model on train_loader data. ### Arguments: train_loader - torch.utils.data.DataLoader: training dataset wrapped in a dataloader. model - PyTorch Model: the model to train. criterion - torch.nn: Loss Function to use. optimizer - torch.optim: optimizer to use epoch - int: current training epoch. args - dict: arguments passed. """ batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def validate(val_loader, model, criterion, args, device): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, args, writer=None): # TODO: get writer from locals() # writer = locals()['writer'] batch_time = CumMeter('Time(s)', ':.2f') data_time = CumMeter('Data(s)', ':.2f') losses = AverageMeter('bce', ':.3e') dices = AverageMeter('dice', ':.3f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, dices], prefix="Epoch: [{}][trn]".format(epoch), ) model.train() train_loader.shuffle() end = time.time() # for batch, (x, y1, y2) in enumerate(train_loader): # BUG: RuntimeError: cuDNN error: CUDNN_STATUS_BAD_PARAM for batch in range(0, len(train_loader) // args.batch_size): x, y1, y2 = train_loader.get_batch( range(batch * args.batch_size, (batch + 1) * args.batch_size)) data_time.update(time.time() - end) x, y1, y2 = x.cuda(), y1.cuda(), y2.cuda() out1, out2 = model(x) loss1 = criterion(out1, y1) loss2 = criterion(out2, y2) loss = loss1 + loss2 optimizer.zero_grad() loss.backward() optimizer.step() out_pred = (out1 > out2).float() * out1 + (out2 > out1).float() * out2 out_gt = y1 + y2 dice = binary_dice(out_pred, out_gt) losses.update(loss.item(), 2) dices.update(dice.item(), 2) batch_time.update(time.time() - end) end = time.time() if batch % args.print_freq == 0: progress.display(batch, logging.DEBUG) writer.add_scalar('train_loss', loss.item(), epoch * len(train_loader) + batch) output_grid = select_rand(out_pred[:, 1, ...].detach(), out_gt[:, 1, ...].detach()) writer.add_image('out_pred', output_grid, epoch * len(train_loader) + batch, dataformats='CHW') progress.display(batch, logging.INFO, reduce=True)
def validate(val_loader, model, criterion, epoch, args, writer=None): batch_time = CumMeter('Time(s)', ':.2f') data_time = CumMeter('Data(s)', ':.2f') losses = AverageMeter('bce', ':.3e') dices = AverageMeter('dice', ':.3f') progress = ProgressMeter( len(val_loader), [batch_time, data_time, losses, dices], prefix="Epoch: [{}][val]".format(epoch), ) model.eval() end = time.time() with torch.no_grad(): # for batch, (x, y1, y2) in enumerate(val_loader): for batch in range(0, len(val_loader) // args.batch_size): x, y1, y2 = val_loader.get_batch( range(batch * args.batch_size, (batch + 1) * args.batch_size)) data_time.update(time.time() - end) x, y1, y2 = x.cuda(), y1.cuda(), y2.cuda() out1, out2 = model(x) loss1 = criterion(out1, y1) loss2 = criterion(out2, y2) loss = loss1 + loss2 out_pred = (out1 > out2).float() * out1 + (out2 > out1).float() * out2 out_gt = y1 + y2 dice = binary_dice(out_pred, out_gt) losses.update(loss.item(), 2) dices.update(dice.item(), 2) batch_time.update(time.time() - end) end = time.time() if batch % args.print_freq == 0: progress.display(batch, logging.DEBUG) writer.add_scalar('valid loss', loss.item(), epoch * len(val_loader) + batch) progress.display(batch, logging.INFO, reduce=True) return dice.cpu().data
def train(train_loader, model, criterion, optimizer, epoch, args, device, ml_logger, val_loader, mq=None): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() best_acc1 = -1 end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) ml_logger.log_metric('Train Acc1', top1.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train Loss', losses.avg, step='auto', log_to_tfboard=False)
def train(train_loader, model, criterion, optimizer, epoch, args, device, ml_logger, val_loader, mq=None, weight_to_hook=None, w_k_scale=0): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') w_k_losses = AverageMeter('W_K_Loss', ':.4e') w_k_vals = AverageMeter('W_K_Val', ':6.2f') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, w_k_losses, w_k_vals, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() best_acc1 = -1 end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) hookF_weights = {} for name, w_tensor in weight_to_hook.items(): # pdb.set_trace() hookF_weights[name] = KurtosisWeight( w_tensor, name, kurtosis_target=args.w_kurtosis_target, k_mode=args.kurtosis_mode) # compute output output = model(images) w_kurtosis_regularization = 0 # pdb.set_trace() if args.w_kurtosis: w_temp_values = [] w_kurtosis_loss = 0 for w_kurt_inst in hookF_weights.values(): # pdb.set_trace() w_kurt_inst.fn_regularization() w_temp_values.append(w_kurt_inst.kurtosis_loss) # pdb.set_trace() if args.kurtosis_mode == 'sum': w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values) elif args.kurtosis_mode == 'avg': # pdb.set_trace() w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values) if args.arch == 'resnet18': w_kurtosis_loss = w_kurtosis_loss / 19 elif args.arch == 'mobilenet_v2': w_kurtosis_loss = w_kurtosis_loss / 51 elif args.arch == 'resnet50': w_kurtosis_loss = w_kurtosis_loss / 52 elif args.kurtosis_mode == 'max': # pdb.set_trace() w_kurtosis_loss = reduce((lambda a, b: max(a, b)), w_temp_values) w_kurtosis_regularization = ( 10**w_k_scale) * args.w_lambda_kurtosis * w_kurtosis_loss orig_loss = criterion(output, target) loss = orig_loss + w_kurtosis_regularization if args.w_kurtosis: w_temp_values = [] for w_kurt_inst in hookF_weights.values(): w_kurt_inst.fn_regularization() w_temp_values.append(w_kurt_inst.kurtosis) w_kurtosis_val = reduce((lambda a, b: a + b), w_temp_values) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) w_k_losses.update(w_kurtosis_regularization.item(), images.size(0)) w_k_vals.update(w_kurtosis_val.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) ml_logger.log_metric('Train Acc1', top1.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train Loss', losses.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train weight kurtosis Loss', w_k_losses.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train weight kurtosis Val', w_k_vals.avg, step='auto', log_to_tfboard=False) for w_kurt_inst in hookF_weights.values(): del w_kurt_inst
def validate(val_loader, model, criterion, epoch, args, writer=None): batch_time = CumMeter('Time(s)', ':.2f') data_time = CumMeter('Data(s)', ':.2f') losses = AverageMeter('bce', ':.3e') dices = AverageMeter('dice', ':.3f') progress = ProgressMeter( len(val_loader), [batch_time, data_time, losses, dices], prefix="Epoch: [{}][val]".format(epoch), ) model.eval() end = time.time() with torch.no_grad(): for batch, (volumes, label_masks) in enumerate(val_loader): data_time.update(time.time() - end) volumes, label_masks = volumes.to(args.device), label_masks.to( args.device) out_masks = model(volumes) [n, _, d, h, w] = out_masks.shape new_label_masks = nn.functional.interpolate( label_masks, size=(d, h, w)) #.to(torch.int64) loss = criterion(out_masks, new_label_masks.squeeze(1).to(torch.int64)) pred_masks = F.softmax(out_masks, dim=1).permute(0, 2, 3, 4, 1) pred_masks = pred_masks.reshape(-1, pred_masks.shape[-1]) new_label_masks_onehot = to_one_hot(new_label_masks, 3) dice = multi_dice(pred_masks, new_label_masks_onehot) losses.update(loss.item(), 2) dices.update(dice.item(), 2) batch_time.update(time.time() - end) end = time.time() if batch % args.print_freq == 0: progress.display(batch, logging.DEBUG) writer.add_scalar('Loss/val', loss.item(), epoch * len(val_loader) + batch) writer.add_scalar('Dice/val', dice.item(), epoch * len(val_loader) + batch) if batch % 10 * args.print_freq == 0: pred_masks = F.softmax(out_masks, dim=1).argmax(dim=1) output_grid = select_rand(pred_masks.detach(), new_label_masks[:, 0, ...].detach()) writer.add_image('out_pred/val', output_grid, epoch * len(val_loader) + batch, dataformats='CHW') progress.display(batch, logging.INFO, reduce=True) writer.add_scalar('Epoch/val_loss', losses.avg, epoch) writer.add_scalar('Epoch/val_dice', dices.avg, epoch) return losses.avg, dices.avg
def train(train_loader, model, criterion, optimizer, epoch, args, writer=None): # TODO: get writer from locals() # writer = locals()['writer'] batch_time = CumMeter('Time(s)', ':.2f') data_time = CumMeter('Data(s)', ':.2f') losses = AverageMeter('bce', ':.3e') dices = AverageMeter('dice', ':.3f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, dices], prefix="Epoch: [{}][trn]".format(epoch), ) model.train() end = time.time() for batch, (volumes, label_masks) in enumerate(train_loader): data_time.update(time.time() - end) volumes, label_masks = volumes.to(args.device), label_masks.to( args.device) out_masks = model(volumes) # resize label [n, _, d, h, w] = out_masks.shape new_label_masks = F.interpolate(label_masks, size=(d, h, w)) #.to(torch.int64) loss = criterion(out_masks, new_label_masks.squeeze(1).to(torch.int64)) optimizer.zero_grad() loss.backward() optimizer.step() # TODO: check multi dice pred_masks = F.softmax(out_masks, dim=1).permute(0, 2, 3, 4, 1) pred_masks = pred_masks.reshape(-1, pred_masks.shape[-1]) new_label_masks_onehot = to_one_hot(new_label_masks, 3) dice = multi_dice(pred_masks, new_label_masks_onehot) losses.update(loss.item(), 2) dices.update(dice.item(), 2) batch_time.update(time.time() - end) end = time.time() if batch % args.print_freq == 0: progress.display(batch, logging.DEBUG) writer.add_scalar('Loss/train', loss.item(), epoch * len(train_loader) + batch) writer.add_scalar('Dice/train', dice.item(), epoch * len(train_loader) + batch) # pdb.set_trace() if batch % 10 * args.print_freq == 0: output_grid = select_rand(volumes[:, 0, ...].detach(), label_masks[:, 0, ...].detach()) writer.add_image('Dataset/train', output_grid, epoch * len(train_loader) + batch, dataformats='CHW') pred_masks = F.softmax(out_masks, dim=1).argmax(dim=1) output_grid = select_rand(pred_masks.detach(), new_label_masks[:, 0, ...].detach()) writer.add_image('out_pred/train', output_grid, epoch * len(train_loader) + batch, dataformats='CHW') progress.display(batch, logging.INFO, reduce=True) writer.add_scalar('Epoch/train_loss', losses.avg, epoch) writer.add_scalar('Epoch/train_dice', dices.avg, epoch)