Example #1
0
def main():
    """Create the model and start the training."""

    w, h = map(int, args.input_size.split(','))
    args.input_size = (w, h)

    w, h = map(int, args.crop_size.split(','))
    args.crop_size = (h, w)

    w, h = map(int, args.input_size_target.split(','))
    args.input_size_target = (w, h)

    cudnn.enabled = True
    cudnn.benchmark = True


    str_ids = args.gpu_ids.split(',')
    gpu_ids = []
    for str_id in str_ids:
        gid = int(str_id)
        if gid >=0:
            gpu_ids.append(gid)

    num_gpu = len(gpu_ids)
    args.multi_gpu = False
    if num_gpu>1:
        args.multi_gpu = True
        Trainer = AD_Trainer(args)
        Trainer.G = torch.nn.DataParallel( Trainer.G, gpu_ids)
        Trainer.D1 = torch.nn.DataParallel( Trainer.D1, gpu_ids)
        Trainer.D2 = torch.nn.DataParallel( Trainer.D2, gpu_ids)
    else:
        Trainer = AD_Trainer(args)

    print(Trainer)

    trainloader = data.DataLoader(
        cityscapesDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size,
                    resize_size=args.input_size,
                    crop_size=args.crop_size,
                    scale=True, mirror=True, mean=IMG_MEAN, autoaug = args.autoaug),
        batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(robotDataSet(args.data_dir_target, args.data_list_target,
                                                     max_iters=args.num_steps * args.iter_size * args.batch_size,
                                                     resize_size=args.input_size_target,
                                                     crop_size=args.crop_size,
                                                     scale=False, mirror=args.random_mirror, mean=IMG_MEAN,
                                                     set=args.set, autoaug = args.autoaug_target),
                                   batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers,
                                   pin_memory=True, drop_last=True)


    targetloader_iter = enumerate(targetloader)

    # set up tensor board
    if args.tensorboard:
        args.log_dir += '/'+ os.path.basename(args.snapshot_dir)
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)

        writer = SummaryWriter(args.log_dir)

    for i_iter in range(args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0


        adjust_learning_rate(Trainer.gen_opt , i_iter, args)
        adjust_learning_rate_D(Trainer.dis1_opt, i_iter, args)
        adjust_learning_rate_D(Trainer.dis2_opt, i_iter, args)

        for sub_i in range(args.iter_size):

            # train G

            # train with source

            _, batch = trainloader_iter.__next__()
            _, batch_t = targetloader_iter.__next__()

            images, labels, _, _ = batch
            images = images.cuda()
            labels = labels.long().cuda()
            images_t, labels_t, _, _ = batch_t
            images_t = images_t.cuda()
            labels_t = labels_t.long().cuda()

            with Timer("Elapsed time in update: %f"):
                loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, pred1, pred2, pred_target1, pred_target2, val_loss = Trainer.gen_update(images, images_t, labels, labels_t, i_iter)
                loss_seg_value1 += loss_seg1.item() / args.iter_size
                loss_seg_value2 += loss_seg2.item() / args.iter_size
                loss_adv_target_value1 += loss_adv_target1 / args.iter_size
                loss_adv_target_value2 += loss_adv_target2 / args.iter_size
                loss_me_value = loss_me

                if args.lambda_adv_target1 > 0 and args.lambda_adv_target2 > 0:
                    loss_D1, loss_D2 = Trainer.dis_update(pred1, pred2, pred_target1, pred_target2)
                    loss_D_value1 += loss_D1.item()
                    loss_D_value2 += loss_D2.item()
                else:
                    loss_D_value1 = 0
                    loss_D_value2 = 0

        del pred1, pred2, pred_target1, pred_target2

        if args.tensorboard:
            scalar_info = {
                'loss_seg1': loss_seg_value1,
                'loss_seg2': loss_seg_value2,
                'loss_adv_target1': loss_adv_target_value1,
                'loss_adv_target2': loss_adv_target_value2,
                'loss_me_target': loss_me_value,
                'loss_kl_target': loss_kl,
                'loss_D1': loss_D_value1,
                'loss_D2': loss_D_value2,
                'val_loss': val_loss,
            }

            if i_iter % 100 == 0:
                for key, val in scalar_info.items():
                    writer.add_scalar(key, val, i_iter)

        print('exp = {}'.format(args.snapshot_dir))
        print(
        '\033[1m iter = %8d/%8d \033[0m loss_seg1 = %.3f loss_seg2 = %.3f loss_me = %.3f  loss_kl = %.3f loss_adv1 = %.3f, loss_adv2 = %.3f loss_D1 = %.3f loss_D2 = %.3f, val_loss=%.3f'%(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_me_value, loss_kl, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2, val_loss))

        # clear loss
        del loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, val_loss

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '.pth'))
            torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D1.pth'))
            torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D2.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))
            torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth'))

    if args.tensorboard:
        writer.close()
Example #2
0
def main():
    """Create the model and start the training."""
    print("NUMBER OF CLASSES: ", str(args.num_classes))

    w, h = map(int, args.input_size.split(','))
    args.input_size = (w, h)

    w, h = map(int, args.crop_size.split(','))
    args.crop_size = (h, w)

    w, h = map(int, args.gt_size.split(','))
    args.gt_size = (w, h)

    cudnn.enabled = True
    cudnn.benchmark = True
    
    # create result dir
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    str_ids = args.gpu_ids.split(',')
    gpu_ids = []
    for str_id in str_ids:
        gid = int(str_id)
        if gid >=0:
            gpu_ids.append(gid)

    num_gpu = len(gpu_ids)
    args.multi_gpu = False
    if num_gpu>1:
        args.multi_gpu = True
        Trainer = AD_Trainer(args)
    else:
        Trainer = AD_Trainer(args)
        
    TARGET_IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
    
    trainloader = data.DataLoader(
        cityscapesDataSet(args.data_dir, args.data_list,
                max_iters=args.num_steps * args.batch_size,
                resize_size=args.input_size,
                crop_size=args.crop_size,
                set=args.set, scale=False, mirror=args.random_mirror, mean=TARGET_IMG_MEAN, autoaug = args.autoaug_target, source_domain=args.source_domain),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)
    '''
    trainloader = data.DataLoader(
        gta5DataSet(args.data_dir, args.data_list,
                    max_iters=args.num_steps * args.batch_size,
                    resize_size=args.input_size,
                    crop_size=args.crop_size,
                    scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)
    
    trainloader = data.DataLoader(
        synthiaDataSet(args.data_dir, args.data_list,
                    max_iters=args.num_steps * args.batch_size,
                    resize_size=args.input_size,
                    crop_size=args.crop_size,
                    scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)'''
    
    # set up tensor board
    if args.tensorboard:
        args.log_dir += '/'+ os.path.basename(args.snapshot_dir)
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)

        writer = SummaryWriter(args.log_dir)
    
    # load mIOU
    best_mIoUs = args.mIOU

    for i_iter in range(args.num_steps):

        loss_seg_value = 0

        adjust_learning_rate(Trainer.gen_opt , i_iter, args)

        _, batch = trainloader_iter.__next__()
        images, labels, _, _ = batch
        images = images.cuda()
        labels = labels.long().cuda()

        with Timer("Elapsed time in update: %f"):
            loss_seg = Trainer.gen_update(images, labels, i_iter)
            loss_seg_value += loss_seg.item()

        if args.tensorboard:
            scalar_info = {
                'loss_seg': loss_seg_value
            }

            if i_iter % 100 == 0:
                for key, val in scalar_info.items():
                    writer.add_scalar(key, val, i_iter)

        print('\033[1m iter = %8d/%8d \033[0m loss_seg = %.3f' %(i_iter, args.num_steps, loss_seg_value))

        del loss_seg

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            mIoUs, _ = evaluate(args, args.gt_dir, args.gt_list, args.result_dir, Trainer.G)
            writer.add_scalar('mIOU', round(np.nanmean(mIoUs)*100, 2), int(i_iter/args.save_pred_every)) # (TB)
            if round(np.nanmean(mIoUs) * 100, 2) > best_mIoUs:
                print('save model ...')
                best_mIoUs = round(np.nanmean(mIoUs) * 100, 2)
                torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'supervised_seg_' + str(i_iter) + '.pth'))

    if args.tensorboard:
        writer.close()