else: raise Exception('Unknown models') if load_model: model_path = os.path.join(args.output_dir, 'checkpoint', args.initial_checkpoint) print_to_log('loading models from file', args.initial_checkpoint, log_file) model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) # model.load_state_dict(torch.load(model_path)) # torch.load(model_path) print_to_log('models loaded!', '', log_file) print("Running models: " + args.model) cuda = torch.cuda.is_available() and args.use_gpu if cuda: model = model.cuda() else: model = model.cpu() print_to_log('gpu', cuda, log_file) log_file.close() if not predict: print("training on train set") train_set = SemanticSegmentationDataset(args.train_dir, os.path.join('image_sets/', args.train_set), image_size, mode='train') val_set = SemanticSegmentationDataset(args.validation_dir, os.path.join('image_sets/', args.valid_set), image_size, mode='valid') loss = torch.nn.CrossEntropyLoss() train_loader = DataLoader(train_set, batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_set, batch_size, drop_last=False)
def main(): """Create the model and start the training.""" args = get_arguments() cudnn.enabled = True n_discriminators = 5 # create teacher & student student_net = UNet(3, n_classes=args.num_classes) teacher_net = UNet(3, n_classes=args.num_classes) student_params = list(student_net.parameters()) # teacher doesn't need gradient as it's just a EMA of the student teacher_params = list(teacher_net.parameters()) for param in teacher_params: param.requires_grad = False student_net.train() student_net.cuda(args.gpu) teacher_net.train() teacher_net.cuda(args.gpu) cudnn.benchmark = True unsup_weights = [ args.unsup_weight5, args.unsup_weight6, args.unsup_weight7, args.unsup_weight8, args.unsup_weight9 ] lambda_adv_tgts = [ args.lambda_adv_tgt5, args.lambda_adv_tgt6, args.lambda_adv_tgt7, args.lambda_adv_tgt8, args.lambda_adv_tgt9 ] # create a list of discriminators discriminators = [] for dis_idx in range(n_discriminators): discriminators.append(FCDiscriminator(num_classes=args.num_classes)) discriminators[dis_idx].train() discriminators[dis_idx].cuda(args.gpu) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) max_iters = args.num_steps * args.iter_size * args.batch_size src_set = REFUGE(True, domain='REFUGE_SRC', is_transform=True, augmentations=aug_student, aug_for_target=aug_teacher, max_iters=max_iters) src_loader = data.DataLoader(src_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) src_loader_iter = enumerate(src_loader) tgt_set = REFUGE(True, domain='REFUGE_DST', is_transform=True, augmentations=aug_student, aug_for_target=aug_teacher, max_iters=max_iters) tgt_loader = data.DataLoader(tgt_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) tgt_loader_iter = enumerate(tgt_loader) student_optimizer = optim.SGD(student_params, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) teacher_optimizer = optim_weight_ema.WeightEMA(teacher_params, student_params, alpha=args.teacher_alpha) d_optimizers = [] for idx in range(n_discriminators): optimizer = optim.Adam(discriminators[idx].parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99)) d_optimizers.append(optimizer) calc_bce_loss = torch.nn.BCEWithLogitsLoss() # labels for adversarial training source_label, tgt_label = 0, 1 for i_iter in range(args.num_steps): total_seg_loss = 0 seg_loss_vals = [0] * n_discriminators adv_tgt_loss_vals = [0] * n_discriminators d_loss_vals = [0] * n_discriminators unsup_loss_vals = [0] * n_discriminators for d_optimizer in d_optimizers: d_optimizer.zero_grad() adjust_learning_rate_D(d_optimizer, i_iter, args) student_optimizer.zero_grad() adjust_learning_rate(student_optimizer, i_iter, args) for sub_i in range(args.iter_size): # ******** Optimize source network with segmentation loss ******** # As we don't change the discriminators, their parameters are fixed for discriminator in discriminators: for param in discriminator.parameters(): param.requires_grad = False _, src_batch = src_loader_iter.__next__() _, _, src_images, src_labels, _ = src_batch src_images = Variable(src_images).cuda(args.gpu) # calculate the segmentation losses sup_preds = list(student_net(src_images)) seg_losses, total_seg_loss = [], 0 for idx, sup_pred in enumerate(sup_preds): sup_interp_pred = (sup_pred) # you also can use dice loss like: dice_loss(src_labels, sup_interp_pred) seg_loss = Weighted_Jaccard_loss(src_labels, sup_interp_pred, args.class_weights, args.gpu) seg_losses.append(seg_loss) total_seg_loss += seg_loss * unsup_weights[idx] seg_loss_vals[idx] += seg_loss.item() / args.iter_size _, tgt_batch = tgt_loader_iter.__next__() tgt_images0, tgt_lbl0, tgt_images1, tgt_lbl1, _ = tgt_batch tgt_images0 = Variable(tgt_images0).cuda(args.gpu) tgt_images1 = Variable(tgt_images1).cuda(args.gpu) # calculate ensemble losses stu_unsup_preds = list(student_net(tgt_images1)) tea_unsup_preds = teacher_net(tgt_images0) total_mse_loss = 0 for idx in range(n_discriminators): stu_unsup_probs = F.softmax(stu_unsup_preds[idx], dim=-1) tea_unsup_probs = F.softmax(tea_unsup_preds[idx], dim=-1) unsup_loss = calc_mse_loss(stu_unsup_probs, tea_unsup_probs, args.batch_size) unsup_loss_vals[idx] += unsup_loss.item() / args.iter_size total_mse_loss += unsup_loss * unsup_weights[idx] total_mse_loss = total_mse_loss / args.iter_size # As the requires_grad is set to False in the discriminator, the # gradients are only accumulated in the generator, the target # student network is optimized to make the outputs of target domain # images close to the outputs of source domain images stu_unsup_preds = list(student_net(tgt_images0)) d_outs, total_adv_loss = [], 0 for idx in range(n_discriminators): stu_unsup_interp_pred = (stu_unsup_preds[idx]) d_outs.append(discriminators[idx](stu_unsup_interp_pred)) label_size = d_outs[idx].data.size() labels = torch.FloatTensor(label_size).fill_(source_label) labels = Variable(labels).cuda(args.gpu) adv_tgt_loss = calc_bce_loss(d_outs[idx], labels) total_adv_loss += lambda_adv_tgts[idx] * adv_tgt_loss adv_tgt_loss_vals[idx] += adv_tgt_loss.item() / args.iter_size total_adv_loss = total_adv_loss / args.iter_size # requires_grad is set to True in the discriminator, we only # accumulate gradients in the discriminators, the discriminators are # optimized to make true predictions d_losses = [] for idx in range(n_discriminators): discriminator = discriminators[idx] for param in discriminator.parameters(): param.requires_grad = True sup_preds[idx] = sup_preds[idx].detach() d_outs[idx] = discriminators[idx](sup_preds[idx]) label_size = d_outs[idx].data.size() labels = torch.FloatTensor(label_size).fill_(source_label) labels = Variable(labels).cuda(args.gpu) d_losses.append(calc_bce_loss(d_outs[idx], labels)) d_losses[idx] = d_losses[idx] / args.iter_size / 2 d_losses[idx].backward() d_loss_vals[idx] += d_losses[idx].item() for idx in range(n_discriminators): stu_unsup_preds[idx] = stu_unsup_preds[idx].detach() d_outs[idx] = discriminators[idx](stu_unsup_preds[idx]) label_size = d_outs[idx].data.size() labels = torch.FloatTensor(label_size).fill_(tgt_label) labels = Variable(labels).cuda(args.gpu) d_losses[idx] = calc_bce_loss(d_outs[idx], labels) d_losses[idx] = d_losses[idx] / args.iter_size / 2 d_losses[idx].backward() d_loss_vals[idx] += d_losses[idx].item() for d_optimizer in d_optimizers: d_optimizer.step() total_loss = total_seg_loss + total_adv_loss + total_mse_loss total_loss.backward() student_optimizer.step() teacher_optimizer.step() log_str = 'iter = {0:7d}/{1:7d}'.format(i_iter, args.num_steps) log_str += ', total_seg_loss = {0:.3f} '.format(total_seg_loss) templ = 'seg_losses = [' + ', '.join(['%.2f'] * len(seg_loss_vals)) log_str += templ % tuple(seg_loss_vals) + '] ' templ = 'ens_losses = [' + ', '.join(['%.5f'] * len(unsup_loss_vals)) log_str += templ % tuple(unsup_loss_vals) + '] ' templ = 'adv_losses = [' + ', '.join(['%.2f'] * len(adv_tgt_loss_vals)) log_str += templ % tuple(adv_tgt_loss_vals) + '] ' templ = 'd_losses = [' + ', '.join(['%.2f'] * len(d_loss_vals)) log_str += templ % tuple(d_loss_vals) + '] ' print(log_str) if i_iter >= args.num_steps_stop - 1: print('save model ...') filename = 'UNet' + str( args.num_steps_stop) + '_v18_weightedclass.pth' torch.save(teacher_net.cpu().state_dict(), os.path.join(args.snapshot_dir, filename)) break if i_iter % args.save_pred_every == 0 and i_iter != 0: print('taking snapshot ...') filename = 'UNet' + str(i_iter) + '_v18_weightedclass.pth' torch.save(teacher_net.cpu().state_dict(), os.path.join(args.snapshot_dir, filename)) teacher_net.cuda(args.gpu)
help="Visualize the images as they are processed", default=False) parser.add_argument('--no-save', '-n', action='store_false', help="Do not save the output masks", default=False) args = parser.parse_args() print("Using model file : {}".format(args.model)) net = UNet(3, 1) if not args.cpu: print("Using CUDA version of the net, prepare your GPU !") net.cuda() else: net.cpu() print("Using CPU version of the net, this may be very slow") in_files = args.input print("in_files=", in_files) out_files = [] if not args.output: for f in in_files: pathsplit = os.path.splitext(f) out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1])) elif len(in_files) != len(args.output): print( "Error : Input files and output files are not of the same length") raise SystemExit() else: out_files = args.output