def train(args, snapshot_path): base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size max_iterations = args.max_iterations def worker_init_fn(worker_id): random.seed(args.seed + worker_id) model = net_factory(net_type=args.model, in_chns=1, class_num=num_classes) db_train = BaseDataSets(base_dir=args.root_path, split="train", num=None, transform=transforms.Compose( [RandomGenerator(args.patch_size)])) total_slices = len(db_train) labeled_slice = patients_to_slices(args.root_path, args.labeled_num) print("Total silices is: {}, labeled slices is: {}".format( total_slices, labeled_slice)) labeled_idxs = list(range(0, labeled_slice)) unlabeled_idxs = list(range(labeled_slice, total_slices)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=16, pin_memory=True, worker_init_fn=worker_init_fn) db_val = BaseDataSets(base_dir=args.root_path, split="val") valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_loss = losses.entropy_loss(outputs_soft, C=4) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) if iter_num % 20 == 0: image = volume_batch[1, 0:1, :, :] writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() metric_list = 0.0 for i_batch, sampled_batch in enumerate(valloader): metric_i = test_single_volume(sampled_batch["image"], sampled_batch["label"], model, classes=num_classes) metric_list += np.array(metric_i) metric_list = metric_list / len(db_val) for class_i in range(num_classes - 1): writer.add_scalar('info/val_{}_dice'.format(class_i + 1), metric_list[class_i, 0], iter_num) writer.add_scalar('info/val_{}_hd95'.format(class_i + 1), metric_list[class_i, 1], iter_num) performance = np.mean(metric_list, axis=0)[0] mean_hd95 = np.mean(metric_list, axis=0)[1] writer.add_scalar('info/val_mean_dice', performance, iter_num) writer.add_scalar('info/val_mean_hd95', mean_hd95, iter_num) if performance > best_performance: best_performance = performance save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) logging.info('iteration %d : mean_dice : %f mean_hd95 : %f' % (iter_num, performance, mean_hd95)) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size max_iterations = args.max_iterations model = net_factory(net_type=args.model, in_chns=1, class_num=num_classes) db_train = BaseDataSets(base_dir=args.root_path, split="train", transform=transforms.Compose( [RandomGenerator(args.patch_size)]), fold=args.fold, sup_type=args.sup_type) db_val = BaseDataSets(base_dir=args.root_path, split="val") def worker_init_fn(worker_id): random.seed(args.seed + worker_id) trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, worker_init_fn=worker_init_fn) valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss(ignore_index=4) dice_loss = losses.DiceLoss(num_classes) gatecrf_loss = ModelLossSemsegGatedCRF() writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) loss_gatedcrf_kernels_desc = [{"weight": 1, "xy": 6, "rgb": 0.1}] loss_gatedcrf_radius = 5 for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) loss_ce = ce_loss(outputs, label_batch[:].long()) out_gatedcrf = gatecrf_loss( outputs_soft, loss_gatedcrf_kernels_desc, loss_gatedcrf_radius, volume_batch, 256, 256, )["loss"] loss = loss_ce + 0.1 * out_gatedcrf optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/out_gatedcrf', out_gatedcrf, iter_num) logging.info('iteration %d : loss : %f, loss_ce: %f' % (iter_num, loss.item(), loss_ce.item())) if iter_num % 20 == 0: image = volume_batch[1, 0:1, :, :] image = (image - image.min()) / (image.max() - image.min()) writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() metric_list = 0.0 for i_batch, sampled_batch in enumerate(valloader): metric_i = test_single_volume(sampled_batch["image"], sampled_batch["label"], model, classes=num_classes) metric_list += np.array(metric_i) metric_list = metric_list / len(db_val) for class_i in range(num_classes - 1): writer.add_scalar('info/val_{}_dice'.format(class_i + 1), metric_list[class_i, 0], iter_num) writer.add_scalar('info/val_{}_hd95'.format(class_i + 1), metric_list[class_i, 1], iter_num) performance = np.mean(metric_list, axis=0)[0] mean_hd95 = np.mean(metric_list, axis=0)[1] writer.add_scalar('info/val_mean_dice', performance, iter_num) writer.add_scalar('info/val_mean_hd95', mean_hd95, iter_num) if performance > best_performance: best_performance = performance save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) logging.info('iteration %d : mean_dice : %f mean_hd95 : %f' % (iter_num, performance, mean_hd95)) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition model = net_factory(net_type=args.model, in_chns=1, class_num=num_classes) if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) db_train = BaseDataSets(base_dir=args.root_path, split="train", num=None, transform=transforms.Compose( [RandomGenerator(args.patch_size)])) db_val = BaseDataSets(base_dir=args.root_path, split="val") total_slices = len(db_train) labeled_slice = patients_to_slices(args.root_path, args.labeled_num) print("Total silices is: {}, labeled slices is: {}".format( total_slices, labeled_slice)) labeled_idxs = list(range(0, labeled_slice)) unlabeled_idxs = list(range(labeled_slice, total_slices)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] noise = torch.clamp( torch.randn_like(unlabeled_volume_batch) * 0.1, -0.2, 0.2) ema_inputs = unlabeled_volume_batch + noise outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output = ema_model(ema_inputs) T = 8 _, _, w, h = unlabeled_volume_batch.shape volume_batch_r = unlabeled_volume_batch.repeat(2, 1, 1, 1) stride = volume_batch_r.shape[0] // 2 preds = torch.zeros([stride * T, num_classes, w, h]).cuda() for i in range(T // 2): ema_inputs = volume_batch_r + \ torch.clamp(torch.randn_like( volume_batch_r) * 0.1, -0.2, 0.2) with torch.no_grad(): preds[2 * stride * i:2 * stride * (i + 1)] = ema_model(ema_inputs) preds = F.softmax(preds, dim=1) preds = preds.reshape(T, stride, num_classes, w, h) preds = torch.mean(preds, dim=0) uncertainty = -1.0 * \ torch.sum(preds*torch.log(preds + 1e-6), dim=1, keepdim=True) loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:args.labeled_bs][:].long()) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_dist = losses.softmax_mse_loss( outputs[args.labeled_bs:], ema_output) # (batch, 2, 112,112,80) threshold = (0.75 + 0.25 * ramps.sigmoid_rampup( iter_num, max_iterations)) * np.log(2) mask = (uncertainty < threshold).float() consistency_loss = torch.sum( mask * consistency_dist) / (2 * torch.sum(mask) + 1e-16) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) if iter_num % 20 == 0: image = volume_batch[1, 0:1, :, :] writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() metric_list = 0.0 for i_batch, sampled_batch in enumerate(valloader): metric_i = test_single_volume(sampled_batch["image"], sampled_batch["label"], model, classes=num_classes) metric_list += np.array(metric_i) metric_list = metric_list / len(db_val) for class_i in range(num_classes - 1): writer.add_scalar('info/val_{}_dice'.format(class_i + 1), metric_list[class_i, 0], iter_num) writer.add_scalar('info/val_{}_hd95'.format(class_i + 1), metric_list[class_i, 1], iter_num) performance = np.mean(metric_list, axis=0)[0] mean_hd95 = np.mean(metric_list, axis=0)[1] writer.add_scalar('info/val_mean_dice', performance, iter_num) writer.add_scalar('info/val_mean_hd95', mean_hd95, iter_num) if performance > best_performance: best_performance = performance save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) logging.info('iteration %d : mean_dice : %f mean_hd95 : %f' % (iter_num, performance, mean_hd95)) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr num_classes = 4 batch_size = args.batch_size max_iterations = args.max_iterations def worker_init_fn(worker_id): random.seed(args.seed + worker_id) model = UNet('efficientnet-b3', encoder_weights='imagenet', in_channels=1, classes=num_classes).cuda() DAN = FCDiscriminator(num_classes=num_classes) DAN = DAN.cuda() db_train = ACDC(base_dir=args.root_path, split="train", num=None, transform=transforms.Compose( [RandomGenerator(args.patch_size)])) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 1312)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=16, pin_memory=True, worker_init_fn=worker_init_fn) db_val = ACDC(base_dir=args.root_path, split="val") valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) DAN_optimizer = optim.Adam(DAN.parameters(), lr=args.DAN_lr, betas=(0.9, 0.99)) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() DAN_target = torch.tensor([0] * args.batch_size).cuda() DAN_target[:args.labeled_bs] = 1 model.train() DAN.eval() outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) DAN_outputs = DAN(outputs_soft[args.labeled_bs:], volume_batch[args.labeled_bs:]) consistency_loss = F.cross_entropy( DAN_outputs, (DAN_target[:args.labeled_bs]).long()) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() model.eval() DAN.train() with torch.no_grad(): outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) DAN_outputs = DAN(outputs_soft, volume_batch) DAN_loss = F.cross_entropy(DAN_outputs, DAN_target.long()) DAN_optimizer.zero_grad() DAN_loss.backward() DAN_optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) if iter_num % 20 == 0: image = volume_batch[1, 0:1, :, :] writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() first_total, second_total, third_total = 0.0, 0.0, 0.0 for i_batch, sampled_batch in enumerate(valloader): first, second, third = test_single_volume( sampled_batch["image"], sampled_batch["label"], model) first_total += np.asarray(first) second_total += np.asarray(second) third_total += np.asarray(third) first_total, second_total, third_total = first_total / \ len(db_val), second_total / \ len(db_val), third_total/len(db_val) writer.add_scalar('info/val_one_dice', first_total[0], iter_num) writer.add_scalar('info/val_one_hd95', first_total[1], iter_num) writer.add_scalar('info/val_two_dice', second_total[0], iter_num) writer.add_scalar('info/val_two_hd95', second_total[1], iter_num) writer.add_scalar('info/val_three_dice', third_total[0], iter_num) writer.add_scalar('info/val_three_hd95', third_total[1], iter_num) performance = (first_total[0] + second_total[0] + third_total[0]) / 3 mean_hd95 = (first_total[1] + second_total[1] + third_total[1]) / 3 writer.add_scalar('info/val_mean_dice', performance, iter_num) writer.add_scalar('info/val_mean_hd95', mean_hd95, iter_num) if performance > best_performance: best_performance = performance save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) logging.info('iteration %d : mean_dice : %f mean_hd95 : %f' % (iter_num, performance, mean_hd95)) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): num_classes = 2 base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations net = unet_3D(n_classes=num_classes, in_channels=1) model = net.cuda() DAN = FC3DDiscriminator(num_classes=num_classes) DAN = DAN.cuda() db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 250)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) DAN_optimizer = optim.Adam(DAN.parameters(), lr=args.DAN_lr, betas=(0.9, 0.99)) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() DAN_target = torch.tensor([1, 1, 0, 0]).cuda() model.train() DAN.eval() outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) loss_ce = ce_loss(outputs, label_batch[:]) loss_dice = dice_loss(outputs_soft, label_batch.unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) DAN_outputs = DAN(outputs_soft[args.labeled_bs:], volume_batch[args.labeled_bs:]) consistency_loss = F.cross_entropy( DAN_outputs, (DAN_target[:args.labeled_bs]).long()) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() model.eval() DAN.train() with torch.no_grad(): outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) DAN_outputs = DAN(outputs_soft, volume_batch) DAN_loss = F.cross_entropy(DAN_outputs, DAN_target.long()) DAN_optimizer.zero_grad() DAN_loss.backward() DAN_optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case(model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info('iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations num_classes = 2 def create_model(ema=False): # Network definition net = net_factory_3d(net_type=args.model, in_chns=1, class_num=num_classes) model = net.cuda() if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 250)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() ema_model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] noise = torch.clamp( torch.randn_like(unlabeled_volume_batch) * 0.1, -0.2, 0.2) ema_inputs = unlabeled_volume_batch + noise outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output = ema_model(ema_inputs) T = 8 _, _, d, w, h = unlabeled_volume_batch.shape volume_batch_r = unlabeled_volume_batch.repeat(2, 1, 1, 1, 1) stride = volume_batch_r.shape[0] // 2 preds = torch.zeros([stride * T, 2, d, w, h]).cuda() for i in range(T // 2): ema_inputs = volume_batch_r + \ torch.clamp(torch.randn_like( volume_batch_r) * 0.1, -0.2, 0.2) with torch.no_grad(): preds[2 * stride * i:2 * stride * (i + 1)] = ema_model(ema_inputs) preds = torch.softmax(preds, dim=1) preds = preds.reshape(T, stride, 2, d, w, h) preds = torch.mean(preds, dim=0) uncertainty = -1.0 * \ torch.sum(preds*torch.log(preds + 1e-6), dim=1, keepdim=True) loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:args.labeled_bs][:]) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_dist = losses.softmax_mse_loss( outputs[args.labeled_bs:], ema_output) # (batch, 2, 112,112,80) threshold = (0.75 + 0.25 * ramps.sigmoid_rampup( iter_num, max_iterations)) * np.log(2) mask = (uncertainty < threshold).float() consistency_loss = torch.sum( mask * consistency_dist) / (2 * torch.sum(mask) + 1e-16) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) writer.add_scalar('loss/loss', loss, iter_num) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case(model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info('iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): num_classes = 3 base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations net = unet_3D_dv_semi(n_classes=num_classes, in_channels=1) model = net.cuda() db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, args.total_labeled_num)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) kl_distance = nn.KLDivLoss(reduction='none') for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] outputs_aux1, outputs_aux2, outputs_aux3, outputs_aux4, = model( volume_batch) outputs_aux1_soft = torch.softmax(outputs_aux1, dim=1) outputs_aux2_soft = torch.softmax(outputs_aux2, dim=1) outputs_aux3_soft = torch.softmax(outputs_aux3, dim=1) outputs_aux4_soft = torch.softmax(outputs_aux4, dim=1) loss_ce_aux1 = ce_loss(outputs_aux1[:args.labeled_bs], label_batch[:args.labeled_bs]) loss_ce_aux2 = ce_loss(outputs_aux2[:args.labeled_bs], label_batch[:args.labeled_bs]) loss_ce_aux3 = ce_loss(outputs_aux3[:args.labeled_bs], label_batch[:args.labeled_bs]) loss_ce_aux4 = ce_loss(outputs_aux4[:args.labeled_bs], label_batch[:args.labeled_bs]) loss_dice_aux1 = dice_loss( outputs_aux1_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) loss_dice_aux2 = dice_loss( outputs_aux2_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) loss_dice_aux3 = dice_loss( outputs_aux3_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) loss_dice_aux4 = dice_loss( outputs_aux4_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = (loss_ce_aux1 + loss_ce_aux2 + loss_ce_aux3 + loss_ce_aux4 + loss_dice_aux1 + loss_dice_aux2 + loss_dice_aux3 + loss_dice_aux4) / 8 preds = (outputs_aux1_soft + outputs_aux2_soft + outputs_aux3_soft + outputs_aux4_soft) / 4 variance_aux1 = torch.sum(kl_distance( torch.log(outputs_aux1_soft[args.labeled_bs:]), preds[args.labeled_bs:]), dim=1, keepdim=True) exp_variance_aux1 = torch.exp(-variance_aux1) variance_aux2 = torch.sum(kl_distance( torch.log(outputs_aux2_soft[args.labeled_bs:]), preds[args.labeled_bs:]), dim=1, keepdim=True) exp_variance_aux2 = torch.exp(-variance_aux2) variance_aux3 = torch.sum(kl_distance( torch.log(outputs_aux3_soft[args.labeled_bs:]), preds[args.labeled_bs:]), dim=1, keepdim=True) exp_variance_aux3 = torch.exp(-variance_aux3) variance_aux4 = torch.sum(kl_distance( torch.log(outputs_aux4_soft[args.labeled_bs:]), preds[args.labeled_bs:]), dim=1, keepdim=True) exp_variance_aux4 = torch.exp(-variance_aux4) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_dist_aux1 = (preds[args.labeled_bs:] - outputs_aux1_soft[args.labeled_bs:])**2 consistency_loss_aux1 = torch.mean( consistency_dist_aux1 * exp_variance_aux1) / (torch.mean(exp_variance_aux1) + 1e-8) + torch.mean(variance_aux1) consistency_dist_aux2 = (preds[args.labeled_bs:] - outputs_aux2_soft[args.labeled_bs:])**2 consistency_loss_aux2 = torch.mean( consistency_dist_aux2 * exp_variance_aux2) / (torch.mean(exp_variance_aux2) + 1e-8) + torch.mean(variance_aux2) consistency_dist_aux3 = (preds[args.labeled_bs:] - outputs_aux3_soft[args.labeled_bs:])**2 consistency_loss_aux3 = torch.mean( consistency_dist_aux3 * exp_variance_aux3) / (torch.mean(exp_variance_aux3) + 1e-8) + torch.mean(variance_aux3) consistency_dist_aux4 = (preds[args.labeled_bs:] - outputs_aux4_soft[args.labeled_bs:])**2 consistency_loss_aux4 = torch.mean( consistency_dist_aux4 * exp_variance_aux4) / (torch.mean(exp_variance_aux4) + 1e-8) + torch.mean(variance_aux4) consistency_loss = (consistency_loss_aux1 + consistency_loss_aux2 + consistency_loss_aux3 + consistency_loss_aux4) / 4 loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/supervised_loss', supervised_loss, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info('iteration %d : loss : %f, supervised_loss: %f' % (iter_num, loss.item(), supervised_loss.item())) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = torch.argmax( outputs_aux1_soft, dim=1, keepdim=True)[0, 0:1, :, :, 20:61:10].permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) * 100 grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) * 100 grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case(model, args.root_path, test_list="val.txt", num_classes=num_classes, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) for cls in range(1, num_classes): writer.add_scalar('info/val_cls_{}_dice_score'.format(cls), avg_metric[cls - 1, 0], iter_num) writer.add_scalar('info/val_cls_{}_hd95'.format(cls), avg_metric[cls - 1, 1], iter_num) writer.add_scalar('info/val_mean_dice_score', avg_metric[:, 0].mean(), iter_num) writer.add_scalar('info/val_mean_hd95', avg_metric[:, 1].mean(), iter_num) logging.info('iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[:, 0].mean(), avg_metric[:, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition net = unet_3D(n_classes=2, in_channels=1) model = net.cuda() if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, args.total_labeled_num)) batch_sampler = TwoStreamBatchSampler( labeled_idxs, unlabeled_idxs, batch_size, batch_size-args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() ema_model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch['label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() labeled_volume_batch = volume_batch[:args.labeled_bs] unlabeled_volume_batch = volume_batch[args.labeled_bs:] # ICT mix factors ict_mix_factors = np.random.beta( args.ict_alpha, args.ict_alpha, size=(args.labeled_bs//2, 1, 1, 1, 1)) ict_mix_factors = torch.tensor( ict_mix_factors, dtype=torch.float).cuda() unlabeled_volume_batch_0 = unlabeled_volume_batch[0:1, ...] unlabeled_volume_batch_1 = unlabeled_volume_batch[1:2, ...] # Mix images batch_ux_mixed = unlabeled_volume_batch_0 * \ (1.0 - ict_mix_factors) + \ unlabeled_volume_batch_1 * ict_mix_factors input_volume_batch = torch.cat( [labeled_volume_batch, batch_ux_mixed], dim=0) outputs = model(input_volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output_ux0 = torch.softmax( ema_model(unlabeled_volume_batch_0), dim=1) ema_output_ux1 = torch.softmax( ema_model(unlabeled_volume_batch_1), dim=1) batch_pred_mixed = ema_output_ux0 * \ (1.0 - ict_mix_factors) + ema_output_ux1 * ict_mix_factors loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:args.labeled_bs][:]) loss_dice = dice_loss( outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num//150) consistency_loss = torch.mean( (outputs_soft[args.labeled_bs:] - batch_pred_mixed)**2) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) writer.add_scalar('loss/loss', loss, iter_num) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze( 0).permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case( model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=32, stride_z=32) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join(snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join(snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info( 'iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join( snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations num_classes = 2 model = net_factory_3d(net_type=args.model, in_chns=1, class_num=num_classes) db_train = BraTS2019(base_dir=train_data_path, split='train', num=args.labeled_num, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True, worker_init_fn=worker_init_fn) model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch['label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) loss_ce = ce_loss(outputs, label_batch[:]) loss_dice = dice_loss(outputs_soft, label_batch.unsqueeze(1)) loss = 0.5 * (loss_dice + loss_ce) optimizer.zero_grad() loss.backward() optimizer.step() lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) writer.add_scalar('loss/loss', loss, iter_num) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze( 0).permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case( model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join(snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join(snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info( 'iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join( snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
from torch import nn from torch.utils.data import DataLoader, ConcatDataset from torch.utils.tensorboard import SummaryWriter from nets.unet import AttentionUNet, UNet from nets import MODEL_DICT from utils import losses, metrics from utils.plot import plot_prediction from utils.loaders import denormalize, get_datasets import config from config import MODEL_KWARGS, PATCH_SIZE, parser as base_parser LOSSES_DICT = { "crossentropy": nn.CrossEntropyLoss(), "dice": losses.DiceLoss(variant='soft'), # we use the soft Dice by default "iou": losses.soft_iou_loss, "focal": losses.focal_loss, "combined": losses.CombinedLoss() } parser = argparse.ArgumentParser(parents=[base_parser]) parser.add_argument("--model", type=str, choices=list(MODEL_DICT.keys()), required=True) parser.add_argument("--loss", type=str, choices=list(LOSSES_DICT.keys()), default="crossentropy") parser.add_argument("--dataset",
def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations num_classes = 2 net1 = net_factory_3d(net_type=args.model, in_chns=1, class_num=num_classes).cuda() net2 = net_factory_3d(net_type=args.model, in_chns=1, class_num=num_classes).cuda() model1 = kaiming_normal_init_weight(net1) model2 = xavier_normal_init_weight(net2) model1.train() model2.train() db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 250)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) optimizer1 = optim.SGD(model1.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) optimizer2 = optim.SGD(model2.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) best_performance1 = 0.0 best_performance2 = 0.0 iter_num = 0 ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) max_epoch = max_iterations // len(trainloader) + 1 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() outputs1 = model1(volume_batch) outputs_soft1 = torch.softmax(outputs1, dim=1) outputs2 = model2(volume_batch) outputs_soft2 = torch.softmax(outputs2, dim=1) consistency_weight = get_current_consistency_weight(iter_num // 150) loss1 = 0.5 * ( ce_loss(outputs1[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) + dice_loss(outputs_soft1[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1))) loss2 = 0.5 * ( ce_loss(outputs2[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) + dice_loss(outputs_soft2[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1))) pseudo_outputs1 = torch.argmax( outputs_soft1[args.labeled_bs:].detach(), dim=1, keepdim=False) pseudo_outputs2 = torch.argmax( outputs_soft2[args.labeled_bs:].detach(), dim=1, keepdim=False) pseudo_supervision1 = ce_loss(outputs1[args.labeled_bs:], pseudo_outputs2) pseudo_supervision2 = ce_loss(outputs2[args.labeled_bs:], pseudo_outputs1) model1_loss = loss1 + consistency_weight * pseudo_supervision1 model2_loss = loss2 + consistency_weight * pseudo_supervision2 loss = model1_loss + model2_loss optimizer1.zero_grad() optimizer2.zero_grad() loss.backward() optimizer1.step() optimizer2.step() iter_num = iter_num + 1 lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group1 in optimizer1.param_groups: param_group1['lr'] = lr_ for param_group2 in optimizer2.param_groups: param_group2['lr'] = lr_ writer.add_scalar('lr', lr_, iter_num) writer.add_scalar('consistency_weight/consistency_weight', consistency_weight, iter_num) writer.add_scalar('loss/model1_loss', model1_loss, iter_num) writer.add_scalar('loss/model2_loss', model2_loss, iter_num) logging.info('iteration %d : model1 loss : %f model2 loss : %f' % (iter_num, model1_loss.item(), model2_loss.item())) if iter_num % 50 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft1[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Model1_Predicted_label', grid_image, iter_num) image = outputs_soft2[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Model2_Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model1.eval() avg_metric1 = test_all_case(model1, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric1[:, 0].mean() > best_performance1: best_performance1 = avg_metric1[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'model1_iter_{}_dice_{}.pth'.format( iter_num, round(best_performance1, 4))) save_best = os.path.join( snapshot_path, '{}_best_model1.pth'.format(args.model)) torch.save(model1.state_dict(), save_mode_path) torch.save(model1.state_dict(), save_best) writer.add_scalar('info/model1_val_dice_score', avg_metric1[0, 0], iter_num) writer.add_scalar('info/model1_val_hd95', avg_metric1[0, 1], iter_num) logging.info( 'iteration %d : model1_dice_score : %f model1_hd95 : %f' % (iter_num, avg_metric1[0, 0].mean(), avg_metric1[0, 1].mean())) model1.train() model2.eval() avg_metric2 = test_all_case(model2, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric2[:, 0].mean() > best_performance2: best_performance2 = avg_metric2[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'model2_iter_{}_dice_{}.pth'.format( iter_num, round(best_performance2, 4))) save_best = os.path.join( snapshot_path, '{}_best_model2.pth'.format(args.model)) torch.save(model2.state_dict(), save_mode_path) torch.save(model2.state_dict(), save_best) writer.add_scalar('info/model2_val_dice_score', avg_metric2[0, 0], iter_num) writer.add_scalar('info/model2_val_hd95', avg_metric2[0, 1], iter_num) logging.info( 'iteration %d : model2_dice_score : %f model2_hd95 : %f' % (iter_num, avg_metric2[0, 0].mean(), avg_metric2[0, 1].mean())) model2.train() if iter_num % 3000 == 0: save_mode_path = os.path.join( snapshot_path, 'model1_iter_' + str(iter_num) + '.pth') torch.save(model1.state_dict(), save_mode_path) logging.info("save model1 to {}".format(save_mode_path)) save_mode_path = os.path.join( snapshot_path, 'model2_iter_' + str(iter_num) + '.pth') torch.save(model2.state_dict(), save_mode_path) logging.info("save model2 to {}".format(save_mode_path)) if iter_num >= max_iterations: break time1 = time.time() if iter_num >= max_iterations: iterator.close() break writer.close()
def train(args, snapshot_path): base_lr = args.base_lr num_classes = 4 batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition net = UNet('efficientnet-b3', encoder_weights='imagenet', in_channels=1, classes=num_classes) model = net.cuda() if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) db_train = ACDC(base_dir=args.root_path, split="train", num=None, transform=transforms.Compose( [RandomGenerator(args.patch_size)])) db_val = ACDC(base_dir=args.root_path, split="val") labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 1312)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() ema_model.train() valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] labeled_volume_batch = volume_batch[:args.labeled_bs] # ICT mix factors ict_mix_factors = np.random.beta(args.ict_alpha, args.ict_alpha, size=(args.labeled_bs // 2, 1, 1, 1)) ict_mix_factors = torch.tensor(ict_mix_factors, dtype=torch.float).cuda() unlabeled_volume_batch_0 = unlabeled_volume_batch[ 0:args.labeled_bs // 2, ...] unlabeled_volume_batch_1 = unlabeled_volume_batch[ args.labeled_bs // 2:, ...] # Mix images batch_ux_mixed = unlabeled_volume_batch_0 * \ (1.0 - ict_mix_factors) + \ unlabeled_volume_batch_1 * ict_mix_factors input_volume_batch = torch.cat( [labeled_volume_batch, batch_ux_mixed], dim=0) outputs = model(input_volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output_ux0 = torch.softmax( ema_model(unlabeled_volume_batch_0), dim=1) ema_output_ux1 = torch.softmax( ema_model(unlabeled_volume_batch_1), dim=1) batch_pred_mixed = ema_output_ux0 * \ (1.0 - ict_mix_factors) + ema_output_ux1 * ict_mix_factors loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:args.labeled_bs][:].long()) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_loss = torch.mean( (outputs_soft[args.labeled_bs:] - batch_pred_mixed)**2) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) if iter_num % 20 == 0: image = volume_batch[1, 0:1, :, :] writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True) writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num) image = batch_ux_mixed[1, 0:1, :, :] writer.add_image('train/Mixed_Unlabeled', image, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() first_total, second_total, third_total = 0.0, 0.0, 0.0 for i_batch, sampled_batch in enumerate(valloader): first, second, third = test_single_volume( sampled_batch["image"], sampled_batch["label"], model) first_total += np.asarray(first) second_total += np.asarray(second) third_total += np.asarray(third) first_total, second_total, third_total = first_total / \ len(db_val), second_total / \ len(db_val), third_total/len(db_val) writer.add_scalar('info/val_one_dice', first_total[0], iter_num) writer.add_scalar('info/val_one_hd95', first_total[1], iter_num) writer.add_scalar('info/val_two_dice', second_total[0], iter_num) writer.add_scalar('info/val_two_hd95', second_total[1], iter_num) writer.add_scalar('info/val_three_dice', third_total[0], iter_num) writer.add_scalar('info/val_three_hd95', third_total[1], iter_num) performance = (first_total[0] + second_total[0] + third_total[0]) / 3 mean_hd95 = (first_total[1] + second_total[1] + third_total[1]) / 3 writer.add_scalar('info/val_mean_dice', performance, iter_num) writer.add_scalar('info/val_mean_hd95', mean_hd95, iter_num) if performance > best_performance: best_performance = performance save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) logging.info('iteration %d : mean_dice : %f mean_hd95 : %f' % (iter_num, performance, mean_hd95)) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def __init__(self, model, loss, n_classes, learning_rate=0.001, decay_after=1000000, weight_decay=0., model_file=None, *args, **kwargs): self.model = model self.model.cuda() self.init_lr = learning_rate self.n_classes = n_classes self.evaluate_with_dropout = kwargs.get("use_mc", False) self.lr_schedule_type = kwargs.get("lr_schedule_type", 'default') self.loss_name = loss print("INFO - Trainer ", model.__class__, loss, n_classes) self.loss_mask = None if loss == 'ce': self.criterion = nn.NLLLoss() self.criterion_key = 'log_softmax' elif loss == 'dice': self.criterion = losses.DiceLoss(n_classes) self.criterion_key = 'softmax' elif loss == 'dicev2': self.criterion = losses.DiceLossv2(n_classes) self.criterion_key = 'softmax' elif loss == "dicev3": self.criterion = losses.soft_dice_loss self.criterion_key = 'softmax' elif loss == "brier": self.criterion = losses.BrierLoss(n_classes) self.criterion_key = 'softmax' elif loss == 'brierv2': self.criterion = losses.brier_score_loss self.criterion_key = 'softmax' else: raise ValueError( "ERROR - Trainer - loss function unknown {}".format(loss)) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate, amsgrad=True, weight_decay=weight_decay) if self.lr_schedule_type == "default": print("INFO - using default lr schedule!") self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=decay_after) else: self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate, amsgrad=False, weight_decay=weight_decay) self.scheduler = CycleLR(self.optimizer, alpha_zero=self.init_lr, cycle_length=10000) self.training_losses = list() self.validation_losses = list() self._train_iter = 0 self.current_training_loss = 0. self.current_validation_loss = 0. self.current_aleatoric_loss = 0. if model_file: self.load(model_file) if kwargs.get('verbose', False): torchsummary.summary(self.model, (kwargs.get('n_channels'), 256, 256))
def train(args, snapshot_path): base_lr = args.base_lr num_classes = args.num_classes batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition model = net_factory(net_type=args.model, in_chns=1, class_num=num_classes) if ema: for param in model.parameters(): param.detach_() return model model1 = kaiming_normal_init_weight(create_model()) model2 = xavier_normal_init_weight(create_model()) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) db_train = BaseDataSets(base_dir=args.root_path, split="train", num=None, transform=transforms.Compose( [RandomGenerator(args.patch_size)])) db_val = BaseDataSets(base_dir=args.root_path, split="val") total_slices = len(db_train) labeled_slice = patients_to_slices(args.root_path, args.labeled_num) print("Total silices is: {}, labeled slices is: {}".format( total_slices, labeled_slice)) labeled_idxs = list(range(0, labeled_slice)) unlabeled_idxs = list(range(labeled_slice, total_slices)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model1.train() model2.train() valloader = DataLoader(db_val, batch_size=1, shuffle=False, num_workers=1) optimizer1 = optim.SGD(model1.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) optimizer2 = optim.SGD(model2.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(num_classes) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance1 = 0.0 best_performance2 = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() outputs1 = model1(volume_batch) outputs_soft1 = torch.softmax(outputs1, dim=1) outputs2 = model2(volume_batch) outputs_soft2 = torch.softmax(outputs2, dim=1) consistency_weight = get_current_consistency_weight(iter_num // 150) loss1 = 0.5 * ( ce_loss(outputs1[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) + dice_loss(outputs_soft1[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1))) loss2 = 0.5 * ( ce_loss(outputs2[:args.labeled_bs], label_batch[:][:args.labeled_bs].long()) + dice_loss(outputs_soft2[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1))) pseudo_outputs1 = torch.argmax( outputs_soft1[args.labeled_bs:].detach(), dim=1, keepdim=False) pseudo_outputs2 = torch.argmax( outputs_soft2[args.labeled_bs:].detach(), dim=1, keepdim=False) pseudo_supervision1 = ce_loss(outputs1[args.labeled_bs:], pseudo_outputs2) pseudo_supervision2 = ce_loss(outputs2[args.labeled_bs:], pseudo_outputs1) model1_loss = loss1 + consistency_weight * pseudo_supervision1 model2_loss = loss2 + consistency_weight * pseudo_supervision2 loss = model1_loss + model2_loss optimizer1.zero_grad() optimizer2.zero_grad() loss.backward() optimizer1.step() optimizer2.step() iter_num = iter_num + 1 lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer1.param_groups: param_group['lr'] = lr_ for param_group in optimizer2.param_groups: param_group['lr'] = lr_ writer.add_scalar('lr', lr_, iter_num) writer.add_scalar('consistency_weight/consistency_weight', consistency_weight, iter_num) writer.add_scalar('loss/model1_loss', model1_loss, iter_num) writer.add_scalar('loss/model2_loss', model2_loss, iter_num) logging.info('iteration %d : model1 loss : %f model2 loss : %f' % (iter_num, model1_loss.item(), model2_loss.item())) if iter_num % 50 == 0: image = volume_batch[1, 0:1, :, :] writer.add_image('train/Image', image, iter_num) outputs = torch.argmax(torch.softmax(outputs1, dim=1), dim=1, keepdim=True) writer.add_image('train/model1_Prediction', outputs[1, ...] * 50, iter_num) outputs = torch.argmax(torch.softmax(outputs2, dim=1), dim=1, keepdim=True) writer.add_image('train/model2_Prediction', outputs[1, ...] * 50, iter_num) labs = label_batch[1, ...].unsqueeze(0) * 50 writer.add_image('train/GroundTruth', labs, iter_num) if iter_num > 0 and iter_num % 200 == 0: model1.eval() metric_list = 0.0 for i_batch, sampled_batch in enumerate(valloader): metric_i = test_single_volume(sampled_batch["image"], sampled_batch["label"], model1, classes=num_classes) metric_list += np.array(metric_i) metric_list = metric_list / len(db_val) for class_i in range(num_classes - 1): writer.add_scalar( 'info/model1_val_{}_dice'.format(class_i + 1), metric_list[class_i, 0], iter_num) writer.add_scalar( 'info/model1_val_{}_hd95'.format(class_i + 1), metric_list[class_i, 1], iter_num) performance1 = np.mean(metric_list, axis=0)[0] mean_hd951 = np.mean(metric_list, axis=0)[1] writer.add_scalar('info/model1_val_mean_dice', performance1, iter_num) writer.add_scalar('info/model1_val_mean_hd95', mean_hd951, iter_num) if performance1 > best_performance1: best_performance1 = performance1 save_mode_path = os.path.join( snapshot_path, 'model1_iter_{}_dice_{}.pth'.format( iter_num, round(best_performance1, 4))) save_best = os.path.join( snapshot_path, '{}_best_model1.pth'.format(args.model)) torch.save(model1.state_dict(), save_mode_path) torch.save(model1.state_dict(), save_best) logging.info( 'iteration %d : model1_mean_dice : %f model1_mean_hd95 : %f' % (iter_num, performance1, mean_hd951)) model1.train() model2.eval() metric_list = 0.0 for i_batch, sampled_batch in enumerate(valloader): metric_i = test_single_volume(sampled_batch["image"], sampled_batch["label"], model2, classes=num_classes) metric_list += np.array(metric_i) metric_list = metric_list / len(db_val) for class_i in range(num_classes - 1): writer.add_scalar( 'info/model2_val_{}_dice'.format(class_i + 1), metric_list[class_i, 0], iter_num) writer.add_scalar( 'info/model2_val_{}_hd95'.format(class_i + 1), metric_list[class_i, 1], iter_num) performance2 = np.mean(metric_list, axis=0)[0] mean_hd952 = np.mean(metric_list, axis=0)[1] writer.add_scalar('info/model2_val_mean_dice', performance2, iter_num) writer.add_scalar('info/model2_val_mean_hd95', mean_hd952, iter_num) if performance2 > best_performance2: best_performance2 = performance2 save_mode_path = os.path.join( snapshot_path, 'model2_iter_{}_dice_{}.pth'.format( iter_num, round(best_performance2))) save_best = os.path.join( snapshot_path, '{}_best_model2.pth'.format(args.model)) torch.save(model2.state_dict(), save_mode_path) torch.save(model2.state_dict(), save_best) logging.info( 'iteration %d : model2_mean_dice : %f model2_mean_hd95 : %f' % (iter_num, performance2, mean_hd952)) model2.train() # change lr if iter_num % 2500 == 0: lr_ = base_lr * 0.1**(iter_num // 2500) for param_group in optimizer1.param_groups: param_group['lr'] = lr_ for param_group in optimizer2.param_groups: param_group['lr'] = lr_ if iter_num % 3000 == 0: save_mode_path = os.path.join( snapshot_path, 'model1_iter_' + str(iter_num) + '.pth') torch.save(model1.state_dict(), save_mode_path) logging.info("save model1 to {}".format(save_mode_path)) save_mode_path = os.path.join( snapshot_path, 'model2_iter_' + str(iter_num) + '.pth') torch.save(model2.state_dict(), save_mode_path) logging.info("save model2 to {}".format(save_mode_path)) if iter_num >= max_iterations: break time1 = time.time() if iter_num >= max_iterations: iterator.close() break writer.close()
encoder_name="mobilenetv1", encoder_weights=False, encoder_depth=5, psp_out_channels=512, # PSP out channels after concat not yet final psp_use_batchnorm=True, psp_dropout=0.2, in_channels=3, classes=len(CLASSES), activation='sigmoid', # Optional[Union[str, callable]] dilated=False, # aux_params={'classes': 1, 'height': 320, # 'width': 320, 'dropout': 0.2}, # Opt ) # Define parameters loss = losses.DiceLoss() metrics = [ metrics.IoU(threshold=0.5), metrics.Accuracy(), metrics.Recall() ] optimizer = torch.optim.Adam([ dict(params=model.parameters(), lr=FLAGS.learning_rate), ]) # optimizer = adam_optimizer(model, FLAGS.learning_rate, weight_decay=1e-4) lr_step_size = FLAGS.step_size scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step_size,