else:
        raise Exception('Unknown models')
    if load_model:
        model_path = os.path.join(args.output_dir, 'checkpoint', args.initial_checkpoint)
        print_to_log('loading models from file', args.initial_checkpoint, log_file)
        model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
        # model.load_state_dict(torch.load(model_path))
        # torch.load(model_path)
        print_to_log('models loaded!', '', log_file)

    print("Running models: " + args.model)
    cuda = torch.cuda.is_available() and args.use_gpu
    if cuda:
        model = model.cuda()
    else:
        model = model.cpu()
    print_to_log('gpu', cuda, log_file)
    log_file.close()

    if not predict:
        print("training on train set")
        train_set = SemanticSegmentationDataset(args.train_dir,
                                                os.path.join('image_sets/', args.train_set),
                                                image_size, mode='train')
        val_set = SemanticSegmentationDataset(args.validation_dir,
                                              os.path.join('image_sets/', args.valid_set),
                                              image_size, mode='valid')

        loss = torch.nn.CrossEntropyLoss()
        train_loader = DataLoader(train_set, batch_size, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_set, batch_size, drop_last=False)
Example #2
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    cudnn.enabled = True
    n_discriminators = 5

    # create teacher & student
    student_net = UNet(3, n_classes=args.num_classes)
    teacher_net = UNet(3, n_classes=args.num_classes)
    student_params = list(student_net.parameters())

    # teacher doesn't need gradient as it's just a EMA of the student
    teacher_params = list(teacher_net.parameters())
    for param in teacher_params:
        param.requires_grad = False

    student_net.train()
    student_net.cuda(args.gpu)
    teacher_net.train()
    teacher_net.cuda(args.gpu)

    cudnn.benchmark = True
    unsup_weights = [
        args.unsup_weight5, args.unsup_weight6, args.unsup_weight7,
        args.unsup_weight8, args.unsup_weight9
    ]
    lambda_adv_tgts = [
        args.lambda_adv_tgt5, args.lambda_adv_tgt6, args.lambda_adv_tgt7,
        args.lambda_adv_tgt8, args.lambda_adv_tgt9
    ]

    # create a list of discriminators
    discriminators = []
    for dis_idx in range(n_discriminators):
        discriminators.append(FCDiscriminator(num_classes=args.num_classes))
        discriminators[dis_idx].train()
        discriminators[dis_idx].cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    max_iters = args.num_steps * args.iter_size * args.batch_size
    src_set = REFUGE(True,
                     domain='REFUGE_SRC',
                     is_transform=True,
                     augmentations=aug_student,
                     aug_for_target=aug_teacher,
                     max_iters=max_iters)
    src_loader = data.DataLoader(src_set,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 pin_memory=True)

    src_loader_iter = enumerate(src_loader)
    tgt_set = REFUGE(True,
                     domain='REFUGE_DST',
                     is_transform=True,
                     augmentations=aug_student,
                     aug_for_target=aug_teacher,
                     max_iters=max_iters)
    tgt_loader = data.DataLoader(tgt_set,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 pin_memory=True)

    tgt_loader_iter = enumerate(tgt_loader)
    student_optimizer = optim.SGD(student_params,
                                  lr=args.learning_rate,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
    teacher_optimizer = optim_weight_ema.WeightEMA(teacher_params,
                                                   student_params,
                                                   alpha=args.teacher_alpha)

    d_optimizers = []
    for idx in range(n_discriminators):
        optimizer = optim.Adam(discriminators[idx].parameters(),
                               lr=args.learning_rate_D,
                               betas=(0.9, 0.99))
        d_optimizers.append(optimizer)

    calc_bce_loss = torch.nn.BCEWithLogitsLoss()

    # labels for adversarial training
    source_label, tgt_label = 0, 1
    for i_iter in range(args.num_steps):

        total_seg_loss = 0
        seg_loss_vals = [0] * n_discriminators
        adv_tgt_loss_vals = [0] * n_discriminators
        d_loss_vals = [0] * n_discriminators
        unsup_loss_vals = [0] * n_discriminators

        for d_optimizer in d_optimizers:
            d_optimizer.zero_grad()
            adjust_learning_rate_D(d_optimizer, i_iter, args)

        student_optimizer.zero_grad()
        adjust_learning_rate(student_optimizer, i_iter, args)

        for sub_i in range(args.iter_size):

            # ******** Optimize source network with segmentation loss ********
            # As we don't change the discriminators, their parameters are fixed
            for discriminator in discriminators:
                for param in discriminator.parameters():
                    param.requires_grad = False

            _, src_batch = src_loader_iter.__next__()
            _, _, src_images, src_labels, _ = src_batch
            src_images = Variable(src_images).cuda(args.gpu)

            # calculate the segmentation losses
            sup_preds = list(student_net(src_images))
            seg_losses, total_seg_loss = [], 0
            for idx, sup_pred in enumerate(sup_preds):
                sup_interp_pred = (sup_pred)
                # you also can use dice loss like: dice_loss(src_labels, sup_interp_pred)
                seg_loss = Weighted_Jaccard_loss(src_labels, sup_interp_pred,
                                                 args.class_weights, args.gpu)
                seg_losses.append(seg_loss)
                total_seg_loss += seg_loss * unsup_weights[idx]
                seg_loss_vals[idx] += seg_loss.item() / args.iter_size

            _, tgt_batch = tgt_loader_iter.__next__()
            tgt_images0, tgt_lbl0, tgt_images1, tgt_lbl1, _ = tgt_batch
            tgt_images0 = Variable(tgt_images0).cuda(args.gpu)
            tgt_images1 = Variable(tgt_images1).cuda(args.gpu)

            # calculate ensemble losses
            stu_unsup_preds = list(student_net(tgt_images1))
            tea_unsup_preds = teacher_net(tgt_images0)
            total_mse_loss = 0
            for idx in range(n_discriminators):
                stu_unsup_probs = F.softmax(stu_unsup_preds[idx], dim=-1)
                tea_unsup_probs = F.softmax(tea_unsup_preds[idx], dim=-1)

                unsup_loss = calc_mse_loss(stu_unsup_probs, tea_unsup_probs,
                                           args.batch_size)
                unsup_loss_vals[idx] += unsup_loss.item() / args.iter_size
                total_mse_loss += unsup_loss * unsup_weights[idx]

            total_mse_loss = total_mse_loss / args.iter_size

            # As the requires_grad is set to False in the discriminator, the
            # gradients are only accumulated in the generator, the target
            # student network is optimized to make the outputs of target domain
            # images close to the outputs of source domain images
            stu_unsup_preds = list(student_net(tgt_images0))
            d_outs, total_adv_loss = [], 0
            for idx in range(n_discriminators):
                stu_unsup_interp_pred = (stu_unsup_preds[idx])
                d_outs.append(discriminators[idx](stu_unsup_interp_pred))
                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(source_label)
                labels = Variable(labels).cuda(args.gpu)
                adv_tgt_loss = calc_bce_loss(d_outs[idx], labels)

                total_adv_loss += lambda_adv_tgts[idx] * adv_tgt_loss
                adv_tgt_loss_vals[idx] += adv_tgt_loss.item() / args.iter_size

            total_adv_loss = total_adv_loss / args.iter_size

            # requires_grad is set to True in the discriminator,  we only
            # accumulate gradients in the discriminators, the discriminators are
            # optimized to make true predictions
            d_losses = []
            for idx in range(n_discriminators):
                discriminator = discriminators[idx]
                for param in discriminator.parameters():
                    param.requires_grad = True

                sup_preds[idx] = sup_preds[idx].detach()
                d_outs[idx] = discriminators[idx](sup_preds[idx])

                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(source_label)
                labels = Variable(labels).cuda(args.gpu)

                d_losses.append(calc_bce_loss(d_outs[idx], labels))
                d_losses[idx] = d_losses[idx] / args.iter_size / 2
                d_losses[idx].backward()
                d_loss_vals[idx] += d_losses[idx].item()

            for idx in range(n_discriminators):
                stu_unsup_preds[idx] = stu_unsup_preds[idx].detach()
                d_outs[idx] = discriminators[idx](stu_unsup_preds[idx])

                label_size = d_outs[idx].data.size()
                labels = torch.FloatTensor(label_size).fill_(tgt_label)
                labels = Variable(labels).cuda(args.gpu)

                d_losses[idx] = calc_bce_loss(d_outs[idx], labels)
                d_losses[idx] = d_losses[idx] / args.iter_size / 2
                d_losses[idx].backward()
                d_loss_vals[idx] += d_losses[idx].item()

        for d_optimizer in d_optimizers:
            d_optimizer.step()

        total_loss = total_seg_loss + total_adv_loss + total_mse_loss
        total_loss.backward()
        student_optimizer.step()
        teacher_optimizer.step()

        log_str = 'iter = {0:7d}/{1:7d}'.format(i_iter, args.num_steps)
        log_str += ', total_seg_loss = {0:.3f} '.format(total_seg_loss)
        templ = 'seg_losses = [' + ', '.join(['%.2f'] * len(seg_loss_vals))
        log_str += templ % tuple(seg_loss_vals) + '] '
        templ = 'ens_losses = [' + ', '.join(['%.5f'] * len(unsup_loss_vals))
        log_str += templ % tuple(unsup_loss_vals) + '] '
        templ = 'adv_losses = [' + ', '.join(['%.2f'] * len(adv_tgt_loss_vals))
        log_str += templ % tuple(adv_tgt_loss_vals) + '] '
        templ = 'd_losses = [' + ', '.join(['%.2f'] * len(d_loss_vals))
        log_str += templ % tuple(d_loss_vals) + '] '

        print(log_str)
        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            filename = 'UNet' + str(
                args.num_steps_stop) + '_v18_weightedclass.pth'
            torch.save(teacher_net.cpu().state_dict(),
                       os.path.join(args.snapshot_dir, filename))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            filename = 'UNet' + str(i_iter) + '_v18_weightedclass.pth'
            torch.save(teacher_net.cpu().state_dict(),
                       os.path.join(args.snapshot_dir, filename))
            teacher_net.cuda(args.gpu)
                        help="Visualize the images as they are processed",
                        default=False)
    parser.add_argument('--no-save',
                        '-n',
                        action='store_false',
                        help="Do not save the output masks",
                        default=False)

    args = parser.parse_args()
    print("Using model file : {}".format(args.model))
    net = UNet(3, 1)
    if not args.cpu:
        print("Using CUDA version of the net, prepare your GPU !")
        net.cuda()
    else:
        net.cpu()
        print("Using CPU version of the net, this may be very slow")

    in_files = args.input
    print("in_files=", in_files)
    out_files = []
    if not args.output:
        for f in in_files:
            pathsplit = os.path.splitext(f)
            out_files.append("{}_OUT{}".format(pathsplit[0], pathsplit[1]))
    elif len(in_files) != len(args.output):
        print(
            "Error : Input files and output files are not of the same length")
        raise SystemExit()
    else:
        out_files = args.output