Example #1
0
def save_all_tensors(opt, output_list, model):    
    fake_image, fake_raw_image, warped_image, flow, weight, atn_score, \
        target_label, target_image, flow_gt, conf_gt, ref_label, ref_image = output_list

    visual_list = [('target_label', util.visualize_label(opt, target_label, model)),                   
                   ('synthesized_image', util.tensor2im(fake_image)),
                   ('target_image', util.tensor2im(target_image)),
                   ('ref_image', util.tensor2im(ref_image)),
                   ('raw_image', util.tensor2im(fake_raw_image)),
                   ('warped_images', util.tensor2im(warped_image, tile=True)),
                   ('flows', util.tensor2flow(flow, tile=True)),
                   ('weights', util.tensor2im(weight, normalize=False, tile=True))]
    visuals = OrderedDict(visual_list)
    return visuals
Example #2
0
def train():
    opt = TrainOptions().parse()
    if opt.debug:
        opt.display_freq = 1
        opt.print_freq = 1
        opt.nThreads = 1

    ### initialize dataset
    data_loader = CreateDataLoader(opt)
    dataset = data_loader.load_data()
    dataset_size = len(data_loader)
    if opt.dataset_mode == 'pose':
        print('#training frames = %d' % dataset_size)
    else:
        print('#training videos = %d' % dataset_size)

    ### initialize models
    modelG, modelD, flowNet = create_model(opt)
    visualizer = Visualizer(opt)

    iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')
    ### if continue training, recover previous states
    if opt.continue_train:
        try:
            start_epoch, epoch_iter = np.loadtxt(iter_path,
                                                 delimiter=',',
                                                 dtype=int)
        except:
            start_epoch, epoch_iter = 1, 0
        print('Resuming from epoch %d at iteration %d' %
              (start_epoch, epoch_iter))
        if start_epoch > opt.niter:
            modelG.module.update_learning_rate(start_epoch - 1)
            modelD.module.update_learning_rate(start_epoch - 1)
        if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (
                start_epoch > opt.niter_fix_global):
            modelG.module.update_fixed_params()
        if start_epoch > opt.niter_step:
            data_loader.dataset.update_training_batch(
                (start_epoch - 1) // opt.niter_step)
            modelG.module.update_training_batch(
                (start_epoch - 1) // opt.niter_step)
    else:
        start_epoch, epoch_iter = 1, 0

    ### set parameters
    n_gpus = opt.n_gpus_gen // opt.batchSize  # number of gpus used for generator for each batch
    tG, tD = opt.n_frames_G, opt.n_frames_D
    tDB = tD * opt.output_nc
    s_scales = opt.n_scales_spatial
    t_scales = opt.n_scales_temporal
    input_nc = 1 if opt.label_nc != 0 else opt.input_nc
    output_nc = opt.output_nc

    opt.print_freq = lcm(opt.print_freq, opt.batchSize)
    total_steps = (start_epoch - 1) * dataset_size + epoch_iter
    total_steps = total_steps // opt.print_freq * opt.print_freq

    ### real training starts here
    for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1):
        epoch_start_time = time.time()
        for idx, data in enumerate(dataset, start=epoch_iter):
            if total_steps % opt.print_freq == 0:
                iter_start_time = time.time()
            total_steps += opt.batchSize
            epoch_iter += opt.batchSize

            # whether to collect output images
            save_fake = total_steps % opt.display_freq == 0

            _, n_frames_total, height, width = data['B'].size(
            )  # n_frames_total = n_frames_load * n_loadings + tG - 1
            n_frames_total = n_frames_total // opt.output_nc
            n_frames_load = opt.max_frames_per_gpu * n_gpus  # number of total frames loaded into GPU at a time for each batch
            n_frames_load = min(n_frames_load, n_frames_total - tG + 1)
            t_len = n_frames_load + tG - 1  # number of loaded frames plus previous frames

            fake_B_last = None  # the last generated frame from previous training batch (which becomes input to the next batch)
            real_B_all, fake_B_all, flow_ref_all, conf_ref_all = None, None, None, None  # all real/generated frames so far
            real_B_skipped, fake_B_skipped = [None] * t_scales, [
                None
            ] * t_scales  # temporally subsampled frames
            flow_ref_skipped, conf_ref_skipped = [None] * t_scales, [
                None
            ] * t_scales  # temporally subsampled flows

            for i in range(0, n_frames_total - t_len + 1, n_frames_load):
                # 5D tensor: batchSize, # of frames, # of channels, height, width
                input_A = Variable(
                    data['A'][:, i * input_nc:(i + t_len) * input_nc,
                              ...]).view(-1, t_len, input_nc, height, width)
                input_B = Variable(
                    data['B'][:, i * output_nc:(i + t_len) * output_nc,
                              ...]).view(-1, t_len, output_nc, height, width)
                inst_A = Variable(data['inst'][:, i:i + t_len, ...]).view(
                    -1, t_len, 1, height,
                    width) if len(data['inst'].size()) > 2 else None

                ###################################### Forward Pass ##########################
                ####### generator
                fake_B, fake_B_raw, flow, weight, real_A, real_Bp, fake_B_last = modelG(
                    input_A, input_B, inst_A, fake_B_last)

                if i == 0:
                    fake_B_first = fake_B[
                        0, 0]  # the first generated image in this sequence
                real_B_prev, real_B = real_Bp[:, :
                                              -1], real_Bp[:,
                                                           1:]  # the collection of previous and current real frames

                ####### discriminator
                ### individual frame discriminator
                flow_ref, conf_ref = flowNet(
                    real_B, real_B_prev)  # reference flows and confidences
                fake_B_prev = real_B_prev[:, 0:
                                          1] if fake_B_last is None else fake_B_last[
                                              0][:, -1:]
                if fake_B.size()[1] > 1:
                    fake_B_prev = torch.cat(
                        [fake_B_prev, fake_B[:, :-1].detach()], dim=1)

                losses = modelD(
                    0,
                    reshape([
                        real_B, fake_B, fake_B_raw, real_A, real_B_prev,
                        fake_B_prev, flow, weight, flow_ref, conf_ref
                    ]))
                losses = [
                    torch.mean(x) if x is not None else 0 for x in losses
                ]
                loss_dict = dict(zip(modelD.module.loss_names, losses))

                ### temporal discriminator
                loss_dict_T = []
                # get skipped frames for each temporal scale
                if t_scales > 0:
                    real_B_all, real_B_skipped = get_skipped_frames(
                        real_B_all, real_B, t_scales, tD)
                    fake_B_all, fake_B_skipped = get_skipped_frames(
                        fake_B_all, fake_B, t_scales, tD)
                    flow_ref_all, conf_ref_all, flow_ref_skipped, conf_ref_skipped = get_skipped_flows(
                        flowNet, flow_ref_all, conf_ref_all, real_B_skipped,
                        flow_ref, conf_ref, t_scales, tD)

                # run discriminator for each temporal scale
                for s in range(t_scales):
                    if real_B_skipped[s] is not None and real_B_skipped[
                            s].size()[1] == tD:
                        losses = modelD(s + 1, [
                            real_B_skipped[s], fake_B_skipped[s],
                            flow_ref_skipped[s], conf_ref_skipped[s]
                        ])
                        losses = [
                            torch.mean(x) if not isinstance(x, int) else x
                            for x in losses
                        ]
                        loss_dict_T.append(
                            dict(zip(modelD.module.loss_names_T, losses)))

                # collect losses
                loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5
                loss_G = loss_dict['G_GAN'] + loss_dict[
                    'G_GAN_Feat'] + loss_dict['G_VGG']
                loss_G += loss_dict['G_Warp'] + loss_dict[
                    'F_Flow'] + loss_dict['F_Warp'] + loss_dict['W']
                if opt.add_face_disc:
                    loss_G += loss_dict['G_f_GAN'] + loss_dict['G_f_GAN_Feat']
                    loss_D += (loss_dict['D_f_fake'] +
                               loss_dict['D_f_real']) * 0.5

                # collect temporal losses
                loss_D_T = []
                t_scales_act = min(t_scales, len(loss_dict_T))
                for s in range(t_scales_act):
                    loss_G += loss_dict_T[s]['G_T_GAN'] + loss_dict_T[s][
                        'G_T_GAN_Feat'] + loss_dict_T[s]['G_T_Warp']
                    loss_D_T.append((loss_dict_T[s]['D_T_fake'] +
                                     loss_dict_T[s]['D_T_real']) * 0.5)

                ###################################### Backward Pass #################################
                optimizer_G = modelG.module.optimizer_G
                optimizer_D = modelD.module.optimizer_D
                # update generator weights
                optimizer_G.zero_grad()
                loss_G.backward()
                optimizer_G.step()

                # update discriminator weights
                # individual frame discriminator
                optimizer_D.zero_grad()
                loss_D.backward()
                optimizer_D.step()
                # temporal discriminator
                for s in range(t_scales_act):
                    optimizer_D_T = getattr(modelD.module,
                                            'optimizer_D_T' + str(s))
                    optimizer_D_T.zero_grad()
                    loss_D_T[s].backward()
                    optimizer_D_T.step()

            if opt.debug:
                call([
                    "nvidia-smi", "--format=csv",
                    "--query-gpu=memory.used,memory.free"
                ])

            ############## Display results and errors ##########
            ### print out errors
            if total_steps % opt.print_freq == 0:
                t = (time.time() - iter_start_time) / opt.print_freq
                errors = {
                    k: v.data.item() if not isinstance(v, int) else v
                    for k, v in loss_dict.items()
                }
                for s in range(len(loss_dict_T)):
                    errors.update({
                        k + str(s):
                        v.data.item() if not isinstance(v, int) else v
                        for k, v in loss_dict_T[s].items()
                    })
                visualizer.print_current_errors(epoch, epoch_iter, errors, t)
                visualizer.plot_current_errors(errors, total_steps)

            ### display output images
            if save_fake:
                if opt.label_nc != 0:
                    input_image = util.tensor2label(real_A[0, -1],
                                                    opt.label_nc)
                elif opt.dataset_mode == 'pose':
                    input_image = util.tensor2im(real_A[0, -1, :3],
                                                 normalize=False)
                    if real_A.size()[2] == 6:
                        input_image2 = util.tensor2im(real_A[0, -1, 3:],
                                                      normalize=False)
                        input_image[input_image2 != 0] = input_image2[
                            input_image2 != 0]
                else:
                    c = 3 if opt.input_nc == 3 else 1
                    input_image = util.tensor2im(real_A[0, -1, :c],
                                                 normalize=False)
                if opt.use_instance:
                    edges = util.tensor2im(real_A[0, -1, -1:, ...],
                                           normalize=False)
                    input_image += edges[:, :, np.newaxis]

                if opt.add_face_disc:
                    ys, ye, xs, xe = modelD.module.get_face_region(real_A[0,
                                                                          -1:])
                    if ys is not None:
                        input_image[ys, xs:xe, :] = input_image[
                            ye, xs:xe, :] = input_image[
                                ys:ye, xs, :] = input_image[ys:ye, xe, :] = 255

                visual_list = [
                    ('input_image', input_image),
                    ('fake_image', util.tensor2im(fake_B[0, -1])),
                    ('fake_first_image', util.tensor2im(fake_B_first)),
                    ('fake_raw_image', util.tensor2im(fake_B_raw[0, -1])),
                    ('real_image', util.tensor2im(real_B[0, -1])),
                    ('flow_ref', util.tensor2flow(flow_ref[0, -1])),
                    ('conf_ref',
                     util.tensor2im(conf_ref[0, -1], normalize=False))
                ]
                if flow is not None:
                    visual_list += [('flow', util.tensor2flow(flow[0, -1])),
                                    ('weight',
                                     util.tensor2im(weight[0, -1],
                                                    normalize=False))]
                visuals = OrderedDict(visual_list)
                visualizer.display_current_results(visuals, epoch, total_steps)

            ### save latest model
            if total_steps % opt.save_latest_freq == 0:
                visualizer.vis_print(
                    'saving the latest model (epoch %d, total_steps %d)' %
                    (epoch, total_steps))
                modelG.module.save('latest')
                modelD.module.save('latest')
                np.savetxt(iter_path, (epoch, epoch_iter),
                           delimiter=',',
                           fmt='%d')

            if epoch_iter > dataset_size - opt.batchSize:
                epoch_iter = 0
                break

        # end of epoch
        iter_end_time = time.time()
        visualizer.vis_print('End of epoch %d / %d \t Time Taken: %d sec' %
                             (epoch, opt.niter + opt.niter_decay,
                              time.time() - epoch_start_time))

        ### save model for this epoch
        if epoch % opt.save_epoch_freq == 0:
            visualizer.vis_print(
                'saving the model at the end of epoch %d, iters %d' %
                (epoch, total_steps))
            modelG.module.save('latest')
            modelD.module.save('latest')
            modelG.module.save(epoch)
            modelD.module.save(epoch)
            np.savetxt(iter_path, (epoch + 1, 0), delimiter=',', fmt='%d')

        ### linearly decay learning rate after certain iterations
        if epoch > opt.niter:
            modelG.module.update_learning_rate(epoch)
            modelD.module.update_learning_rate(epoch)

        ### gradually grow training sequence length
        if (epoch % opt.niter_step) == 0:
            data_loader.dataset.update_training_batch(epoch // opt.niter_step)
            modelG.module.update_training_batch(epoch // opt.niter_step)

        ### finetune all scales
        if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (
                epoch == opt.niter_fix_global):
            modelG.module.update_fixed_params()
Example #3
0
                errors.update({
                    k + str(s): v.data.item() if not isinstance(v, int) else v
                    for k, v in loss_dict_T[s].items()
                })
            visualizer.print_current_errors(epoch, epoch_iter, errors, t)
            visualizer.plot_current_errors(errors, total_steps)

        ### display output images
        if save_fake:
            visual_dict = [('input_nmfc_image',
                            util.tensor2im(nmfc_video[0, -1],
                                           normalize=False)),
                           ('fake_image', util.tensor2im(fake_B[0, -1])),
                           ('fake_first_image', util.tensor2im(fake_B_first)),
                           ('real_image', util.tensor2im(real_B[0, -1])),
                           ('flow_ref', util.tensor2flow(flow_ref[0, -1])),
                           ('conf_ref',
                            util.tensor2im(conf_ref[0, -1], normalize=False))]
            if not opt.no_eye_gaze:
                visual_dict += [('input_eye_gaze_image',
                                 util.tensor2im(eye_gaze_video[0, -1],
                                                normalize=False))]
            if not opt.no_mouth_D:
                mc = util.fit_ROI_in_frame(mouth_centers[-1], opt)
                fake_B_mouth = util.tensor2im(
                    util.crop_ROI(fake_B[0, -1], mc, opt.ROI_size))
                visual_dict += [('fake_image_mouth', fake_B_mouth)]
            if opt.use_eyes_D:
                mc = util.fit_ROI_in_frame(eyes_centers[-1], opt)
                fake_B_eyes = util.tensor2im(
                    util.crop_ROI(fake_B[0, -1], mc, opt.ROI_size))
def save_all_tensors(opt, output_list, model):
    prevs, ref_images, warping_ref_lmark, warping_ref, ori_warping_refs, ani_lmark, ani_image,\
        target_label, target_image, tgt_template, cropped_images, flow_gt, conf_gt, tgt_mask_image = output_list

    # in prevs
    fake_image = torch.cat(prevs['synthesized_images'], axis=0)
    ref_warped_images = handle_cat(prevs['ref_warp_images'])
    ref_weights = handle_cat(prevs['ref_weights'])
    prev_warped_images = handle_cat(prevs['prev_warp_images'])
    prev_weights = handle_cat(prevs['prev_weights'])
    fake_raw_image = torch.cat(
        prevs['raw_images'],
        axis=0) if prevs['raw_images'][0] is not None else None
    ani_warped_images = handle_cat(prevs['ani_warp_images'])
    ani_weights = handle_cat(prevs['ani_weights'])
    ani_flow = handle_cat(prevs['ani_flows'])
    ref_flow = handle_cat(prevs['ref_flows'])
    prev_flow = handle_cat(prevs['prev_flows'])
    img_ani = torch.cat(prevs['ani_syn'],
                        axis=0) if prevs['ani_syn'][0] is not None else None
    try:
        atten_img = model.module.crop_template(
            target_image, tgt_template) if tgt_template is not None else None
        atten_fake_img = model.module.crop_template(
            fake_image.unsqueeze(1),
            tgt_template[-1:]) if tgt_template is not None else None
        atten_raw_img = model.module.crop_template(
            fake_raw_image.unsqueeze(1),
            tgt_template[-1:]) if tgt_template is not None else None
    except:
        try:
            atten_img = model.crop_template(
                target_image,
                tgt_template) if tgt_template is not None else None
            atten_fake_img = model.crop_template(
                fake_image.unsqueeze(1),
                tgt_template[-1:]) if tgt_template is not None else None
            atten_raw_img = model.crop_template(
                fake_raw_image.unsqueeze(1),
                tgt_template[-1:]) if tgt_template is not None else None
        except:
            atten_img = model.model.module.crop_template(
                target_image,
                tgt_template) if tgt_template is not None else None
            atten_fake_img = model.model.module.crop_template(
                fake_image.unsqueeze(1),
                tgt_template[-1:]) if tgt_template is not None else None
            atten_raw_img = model.model.module.crop_template(
                fake_raw_image.unsqueeze(1),
                tgt_template[-1:]) if tgt_template is not None else None

    visual_list = []
    for i in range(opt.n_shot):
        visual_list += [('ref_img_{}'.format(i),
                         util.tensor2im(ref_images[:, i:i + 1]))]
    visual_list += [
        ('warping_ref_lmark', util.tensor2im(warping_ref_lmark, tile=True)),
        ('warping_ref_img', util.tensor2im(warping_ref, tile=True)),
        ('ori_warping_ref_img', util.tensor2im(ori_warping_refs, tile=True)),
        ('warping_target_img', util.tensor2im(tgt_mask_image, tile=True)),
        ('target_label', util.tensor2im(target_label, tile=True)),
        ('target_image', util.tensor2im(target_image, tile=True)),
        ('target_atten_image', util.tensor2im(atten_img, tile=True)
         if atten_img is not None else None),
        ('synthesized_image', util.tensor2im(fake_image, tile=True)),
        ('synthesized_atten_image', util.tensor2im(atten_fake_img, tile=True)),
        ('ani_syn_image', util.tensor2im(img_ani, tile=True)),
        ('ref_warped_images', util.tensor2im(ref_warped_images, tile=True)),
        ('ref_weights', util.tensor2im(ref_weights, normalize=False,
                                       tile=True)),
        ('prev_warped_images', util.tensor2im(prev_warped_images, tile=True)),
        ('prev_weights', util.tensor2im(prev_weights, tile=True)),
        ('raw_image', util.tensor2im(fake_raw_image, tile=True)),
        ('raw_atten_image', util.tensor2im(atten_raw_img, tile=True)),
        ('ani_warped_images', util.tensor2im(ani_warped_images, tile=True)),
        ('ani_weights', util.tensor2im(ani_weights, tile=True)),
        ('ani_flow', util.tensor2flow(ani_flow, tile=True)),
        ('ref_flow', util.tensor2flow(ref_flow, tile=True)),
        ('prev_flow', util.tensor2flow(prev_flow, tile=True)),
        ('ani_image', util.tensor2im(ani_image, tile=True)),
        ('ani_lmark', util.tensor2im(ani_lmark, tile=True)),
        ('cropped_image', util.tensor2im(cropped_images, tile=True)),
        ('flow_ref_gt', util.tensor2flow(flow_gt[0][-1], tile=True)
         if flow_gt[0] is not None else None),
        ('flow_prev_gt', util.tensor2flow(flow_gt[1][-1], tile=True)
         if flow_gt[1] is not None else None),
        ('flow_ani_gt', util.tensor2flow(flow_gt[2][-1], tile=True)
         if flow_gt[2] is not None else None),
    ]
    visuals = OrderedDict(visual_list)
    return visuals