def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition net = unet_3D(n_classes=2, in_channels=1) model = net.cuda() if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 250)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() ema_model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] noise = torch.clamp( torch.randn_like(unlabeled_volume_batch) * 0.1, -0.2, 0.2) ema_inputs = unlabeled_volume_batch + noise outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output = ema_model(ema_inputs) loss_ce = ce_loss(outputs, label_batch[:]) loss_dice = dice_loss(outputs_soft, label_batch.unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_loss = F.mse_loss(outputs[args.labeled_bs:], ema_output) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) writer.add_scalar('loss/loss', loss, iter_num) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case(model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info('iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
image = label_batch[0, :, :, 20:61:10].unsqueeze( 0).permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) image = gt_dis[0, :, :, 20:61:10].unsqueeze( 0).permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_DistMap', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case( model, args.root_path, test_list="test.list", num_classes=num_classes, patch_size=patch_size, stride_xy=18, stride_z=4) print(avg_metric) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join(snapshot_path, 'best_model.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) writer.add_scalar('val/la_heart_dice', avg_metric[0, 0], iter_num) writer.add_scalar('val/la_heart_hd95', avg_metric[0, 1], iter_num) model.train() # change lr if iter_num % 2500 == 0: