Python AT_net Exemples, models.AT_net Python Exemples

Exemple #1

0

Afficher le fichier

def test():
    encoder = AT_net().cuda()
    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)
    encoder.eval()

    input_audio = glob.glob(os.path.join(config.in_file, 'audio/*'))[0]

    speech, sr = librosa.load(input_audio, sr=16000, mono=True)
    speech = np.insert(speech, 0, np.zeros(1920))
    speech = np.append(speech, np.zeros(1920))
    mfcc = python_speech_features.mfcc(speech,16000,winstep=0.01)

    ind = 3

    with torch.no_grad():
        input_mfcc = []
        while ind <= int(mfcc.shape[0]/4) - 4:
            t_mfcc =mfcc[( ind - 3) * 4: (ind + 4) * 4, 1:]
            t_mfcc = torch.FloatTensor(t_mfcc).cuda()
            input_mfcc.append(t_mfcc)
            ind += 1

        input_mfcc = torch.stack(input_mfcc,dim = 0)
        input_mfcc = input_mfcc.unsqueeze(0)
        features = encoder(input_mfcc)

        features_lengh = (len(features) // 25) * 25

        for i in tqdm(range(features_lengh)):
            torch.save(features[i][0].detach().cpu(), os.path.join(config.in_file, 'feature/%05d.pt' % (i+1)))

        print('feature length:',features_lengh)

Exemple #2

0

Afficher le fichier

Fichier : atnet.py Projet : xiaoyun4/ATVGnet

    def __init__(self, config):
        if config.lstm == True:
            self.generator = AT_net()
        else:
            self.generator = AT_single()
        self.l1_loss_fn =  nn.L1Loss()
        self.mse_loss_fn = nn.MSELoss()
        self.config = config

        if config.cuda:
            device_ids = [int(i) for i in config.device_ids.split(',')]
            self.generator     = nn.DataParallel(self.generator, device_ids=device_ids).cuda()
            # self.generator     = self.generator.cuda()
            self.mse_loss_fn   = self.mse_loss_fn.cuda()
            self.l1_loss_fn = self.l1_loss_fn.cuda()
# #########single GPU#######################

#         if config.cuda:
#             device_ids = [int(i) for i in config.device_ids.split(',')]
#             self.generator     = self.generator.cuda()
#             self.encoder = self.encoder.cuda()
#             self.mse_loss_fn   = self.mse_loss_fn.cuda()
#             self.l1_loss_fn =  nn.L1Loss().cuda()
        initialize_weights(self.generator)
        self.start_epoch = 0
        if config.load_model:
            self.start_epoch = config.start_epoch
            self.load(config.pretrained_dir, config.pretrained_epoch)
        self.opt_g = torch.optim.Adam( self.generator.parameters(),
            lr=config.lr, betas=(config.beta1, config.beta2))
        if config.lstm:
            if config.dataset == 'lrw':
                self.dataset = LRW_1D_lstm_landmark_pca(config.dataset_dir, train=config.is_train)
            else:
                self.dataset = GRID_1D_lstm_landmark_pca(config.dataset_dir, train=config.is_train)
        else:
            if config.dataset == 'lrw':
                self.dataset = LRW_1D_single_landmark_pca(config.dataset_dir, train=config.is_train)
            else:
                self.dataset = GRID_1D_single_landmark_pca(config.dataset_dir, train=config.is_train)



        # elif config.dataset == 'ldc':
        #     self.dataset = LDCDataset(config.dataset_dir, train=config.is_train)

        self.data_loader = DataLoader(self.dataset,
                                      batch_size=config.batch_size,
                                      num_workers=config.num_thread,
                                      shuffle=True, drop_last=True)

Exemple #3

0

Afficher le fichier

Fichier : demo.py Projet : xiaoyun4/ATVGnet

def test():
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids
    if os.path.exists('../temp'):
        shutil.rmtree('../temp')
    os.mkdir('../temp')
    os.mkdir('../temp/img')
    os.mkdir('../temp/motion')
    os.mkdir('../temp/attention')
    pca = torch.FloatTensor( np.load('../basics/U_lrw1.npy')[:,:6]).cuda()
    mean =torch.FloatTensor( np.load('../basics/mean_lrw1.npy')).cuda()
    decoder = VG_net()
    encoder = AT_net()
    if config.cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    state_dict2 = multi2single(config.vg_model, 1)

    # state_dict2 = torch.load(config.video_model, map_location=lambda storage, loc: storage)
    decoder.load_state_dict(state_dict2)

    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)

    encoder.eval()
    decoder.eval()
    test_file = config.in_file

    example_image, example_landmark = generator_demo_example_lips( config.person)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
     ])        
    example_image = cv2.cvtColor(example_image, cv2.COLOR_BGR2RGB)
    example_image = transform(example_image)

    example_landmark =  example_landmark.reshape((1,example_landmark.shape[0]* example_landmark.shape[1]))

    if config.cuda:
        example_image = Variable(example_image.view(1,3,128,128)).cuda()
        example_landmark = Variable(torch.FloatTensor(example_landmark.astype(float)) ).cuda()
    else:
        example_image = Variable(example_image.view(1,3,128,128))
        example_landmark = Variable(torch.FloatTensor(example_landmark.astype(float)))
    # Load speech and extract features
    example_landmark = example_landmark * 5.0
    example_landmark  = example_landmark - mean.expand_as(example_landmark)
    example_landmark = torch.mm(example_landmark,  pca)
    speech, sr = librosa.load(test_file, sr=16000)
    mfcc = python_speech_features.mfcc(speech ,16000,winstep=0.01)
    speech = np.insert(speech, 0, np.zeros(1920))
    speech = np.append(speech, np.zeros(1920))
    mfcc = python_speech_features.mfcc(speech,16000,winstep=0.01)

    sound, _ = librosa.load(test_file, sr=44100)

    print ('=======================================')
    print ('Start to generate images')
    t =time.time()
    ind = 3
    with torch.no_grad(): 
        fake_lmark = []
        input_mfcc = []
        while ind <= int(mfcc.shape[0]/4) - 4:
            t_mfcc =mfcc[( ind - 3)*4: (ind + 4)*4, 1:]
            t_mfcc = torch.FloatTensor(t_mfcc).cuda()
            input_mfcc.append(t_mfcc)
            ind += 1
        input_mfcc = torch.stack(input_mfcc,dim = 0)
        input_mfcc = input_mfcc.unsqueeze(0)
        fake_lmark = encoder(example_landmark, input_mfcc)
        fake_lmark = fake_lmark.view(fake_lmark.size(0) *fake_lmark.size(1) , 6)
        example_landmark  = torch.mm( example_landmark, pca.t() ) 
        example_landmark = example_landmark + mean.expand_as(example_landmark)
        fake_lmark[:, 1:6] *= 2*torch.FloatTensor(np.array([1.1, 1.2, 1.3, 1.4, 1.5])).cuda() 
        fake_lmark = torch.mm( fake_lmark, pca.t() )
        fake_lmark = fake_lmark + mean.expand_as(fake_lmark)
    

        fake_lmark = fake_lmark.unsqueeze(0) 

        fake_ims, atts ,ms ,_ = decoder(example_image, fake_lmark, example_landmark )

        for indx in range(fake_ims.size(1)):
            fake_im = fake_ims[:,indx]
            fake_store = fake_im.permute(0,2,3,1).data.cpu().numpy()[0]
            scipy.misc.imsave("{}/{:05d}.png".format(os.path.join('../', 'temp', 'img') ,indx ), fake_store)
            m = ms[:,indx]
            att = atts[:,indx]
            m = m.permute(0,2,3,1).data.cpu().numpy()[0]
            att = att.data.cpu().numpy()[0,0]

            scipy.misc.imsave("{}/{:05d}.png".format(os.path.join('../', 'temp', 'motion' ) ,indx ), m)
            scipy.misc.imsave("{}/{:05d}.png".format(os.path.join('../', 'temp', 'attention') ,indx ), att)

        print ( 'In total, generate {:d} images, cost time: {:03f} seconds'.format(fake_ims.size(1), time.time() - t) )
            
        fake_lmark = fake_lmark.data.cpu().numpy()
        np.save( os.path.join( config.sample_dir,  'obama_fake.npy'), fake_lmark)
        fake_lmark = np.reshape(fake_lmark, (fake_lmark.shape[1], 68, 2))
        utils.write_video_wpts_wsound(fake_lmark, sound, 44100, config.sample_dir, 'fake', [-1.0, 1.0], [-1.0, 1.0])
        video_name = os.path.join(config.sample_dir , 'results.mp4')
        utils.image_to_video(os.path.join('../', 'temp', 'img'), video_name )
        utils.add_audio(video_name, config.in_file)
        print ('The generated video is: {}'.format(os.path.join(config.sample_dir , 'results.mov')))

Exemple #4

0

Afficher le fichier

def test():
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids

    result_dir = 'temp/' + config.in_file
    motion_dir = result_dir + '/motion/'

    os.mkdir(result_dir)
    os.mkdir(motion_dir)

    pca = torch.FloatTensor(np.load('basics/pca.npy')[:, :6])
    mean = torch.FloatTensor(np.load('basics/mean.npy'))
    decoder = VG_net()
    encoder = AT_net()

    state_dict2 = multi2single(config.vg_model, 1)

    decoder.load_state_dict(state_dict2)

    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)

    encoder.eval()
    decoder.eval()
    test_file = result_dir + "/" + config.in_file + ".wav"
    test_file_old = result_dir + "/old_" + config.in_file + ".wav"
    if config.text_tts == "" and config.news_url != "":
        parse_news_content = get_info(config.news_url)['news_content']
    else:
        parse_news_content = config.text_tts
    tts = TTS(config.name_tts,
              "wav",
              "000000-0000-0000-0000-00000000",
              config.lang_tts,
              emotion="neutral",
              speed=1)
    # test content
    tts.generate(parse_news_content[:1999])
    if config.shift == 1:
        tts.save(test_file_old)
        audio_shift(test_file_old, test_file)
    else:
        tts.save(test_file)

    example_image, example_landmark = generator_demo_example_lips(
        config.person)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])
    example_image = cv2.cvtColor(example_image, cv2.COLOR_BGR2RGB)
    example_image = transform(example_image)

    example_landmark = example_landmark.reshape(
        (1, example_landmark.shape[0] * example_landmark.shape[1]))

    if config.cuda == True:
        example_image = Variable(example_image.view(1, 3, 128, 128)).cuda()
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float))).cuda()
    else:
        example_image = Variable(example_image.view(1, 3, 128, 128))
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float)))
    example_landmark = example_landmark * 5.0
    example_landmark = example_landmark - mean.expand_as(example_landmark)
    example_landmark = torch.mm(example_landmark, pca)
    speech, sr = librosa.load(test_file, sr=16000)
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)
    speech = np.insert(speech, 0, np.zeros(1920))
    speech = np.append(speech, np.zeros(1920))
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)

    sound, _ = librosa.load(test_file, sr=44100)

    print('=======================================')
    print('Generate images')
    t = time.time()
    ind = 3
    with torch.no_grad():
        fake_lmark = []
        input_mfcc = []
        while ind <= int(mfcc.shape[0] / 4) - 4:
            t_mfcc = mfcc[(ind - 3) * 4:(ind + 4) * 4, 1:]
            t_mfcc = torch.FloatTensor(t_mfcc)
            input_mfcc.append(t_mfcc)
            ind += 1
        input_mfcc = torch.stack(input_mfcc, dim=0)
        input_mfcc = input_mfcc.unsqueeze(0)
        fake_lmark = encoder(example_landmark, input_mfcc)
        fake_lmark = fake_lmark.view(
            fake_lmark.size(0) * fake_lmark.size(1), 6)
        example_landmark = torch.mm(example_landmark, pca.t())
        example_landmark = example_landmark + mean.expand_as(example_landmark)
        fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
            np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
        fake_lmark = torch.mm(fake_lmark, pca.t())
        fake_lmark = fake_lmark + mean.expand_as(fake_lmark)

        fake_lmark = fake_lmark.unsqueeze(0)

        fake_lmark = fake_lmark.data.cpu().numpy()

        file_mark = result_dir + "/" + config.in_file + ".npy"
        file_mp4 = result_dir + "/" + config.in_file  # + ".mp4"
        np.save(file_mark, fake_lmark)
        mark_paint.mark_video(fake_lmark, motion_dir)

        cmd = 'ffmpeg -framerate 25 -i ' + motion_dir + '%d.png  -filter:v scale=512:-1 -c:v libx264 -pix_fmt yuv420p ' + file_mp4 + '.mp4'
        subprocess.call(cmd, shell=True)
        print('video done')

        cmd = 'ffmpeg -i ' + file_mp4 + '.mp4 -i ' + test_file + ' -c:v copy -c:a aac -strict experimental ' + file_mp4 + '_result.mp4'
        subprocess.call(cmd, shell=True)
        print('video+audio done')

        return file_mark
    return False

Exemple #5

0

Afficher le fichier

def test():
    data_root = '/home/cxu-serve/p1/common/voxceleb2/unzip/test_video/'
    #data_root = '/home/cxu-serve/p1/common/lrs3/lrs3_v0.4/'
    audios = []
    videos = []
    start_ids = []
    end_ids = []
    with open(
            '/home/cxu-serve/p1/common/degree/degree_store/vox/new_extra_data.csv',
            'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            #print (row)
            audios.append(row[1])
            videos.append(row[0])
            start_ids.append(int(row[2]))
            end_ids.append(int(row[3]))
            #audios.append(os.path.join(data_root, 'test',tmp[1], tmp[2] + '.wav'))
            #videos.append(os.path.join(data_root, 'test', tmp[1], tmp[2] + '_crop.mp4'))
    #print (gg)
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids
    if os.path.exists('../temp'):
        shutil.rmtree('../temp')
    os.mkdir('../temp')
    pca = torch.FloatTensor(np.load('../basics/U_lrw1.npy')[:, :6]).cuda()
    mean = torch.FloatTensor(np.load('../basics/mean_lrw1.npy')).cuda()
    decoder = VG_net()
    encoder = AT_net()
    if config.cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    state_dict2 = multi2single(config.vg_model, 1)

    # state_dict2 = torch.load(config.video_model, map_location=lambda storage, loc: storage)
    decoder.load_state_dict(state_dict2)

    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)

    encoder.eval()
    decoder.eval()

    # vox
    # # get file paths
    # path = '/home/cxu-serve/p1/common/experiment/vox_good'
    # files = os.listdir(path)
    # data_root = '/home/cxu-serve/p1/common/voxceleb2/unzip/'
    # audios = []
    # videos = []
    # for f in files:
    #     if f[:7] == 'id00817' :
    #         audios.append(os.path.join(data_root, 'test_audio', f[:7], f[8:-14], '{}.wav'.format(f.split('_')[-2])))
    #         videos.append(os.path.join(data_root, 'test_video', f[:7], f[8:-14], '{}_aligned.mp4'.format(f.split('_')[-2])))

    # for i in range(len(audios)):
    #     audio_file = audios[i]
    #     video_file = videos[i]

    #     test_file = audio_file
    #     image_path = video_file
    #     video_name = image_path.split('/')[-3] + '__'  + image_path.split('/')[-2] +'__' + image_path.split('/')[-1][:-4]

    # get file paths
    #path = '/home/cxu-serve/p1/common/other/lrs_good2'
    #files = os.listdir(path)

    #for f in files:
    #    print (f)
    #    if f[:4] !='test':
    #        continue
    #    tmp = f.split('_')#

    # if f[:7] == 'id00817' :

    for i in range(len(audios)):
        try:
            audio_file = audios[i]
            video_file = videos[i]

            test_file = audio_file
            image_path = video_file
            video_name = video_file.split('/')[-1][:-4] + '_' + str(
                start_ids[i])
            #image_path = os.path.join('../image', video_name + '.jpg')
            #print (video_name, image_path)
            #cap = cv2.VideoCapture(video_file)
            #imgs = []
            #count = 0
            #while(cap.isOpened()):
            #    count += 1
            #    ret, frame = cap.read()
            #    if count != 33:
            #        continue
            #   else:
            #        cv2.imwrite(image_path, frame)
            #    try:
            #        example_image, example_landmark = generator_demo_example_lips(image_path)
            #    except:
            #        continue
            #    break

            example_image, example_landmark = generator_demo_example_lips(
                image_path)

            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
            ])
            example_image = cv2.cvtColor(example_image, cv2.COLOR_BGR2RGB)
            example_image = transform(example_image)

            example_landmark = example_landmark.reshape(
                (1, example_landmark.shape[0] * example_landmark.shape[1]))

            if config.cuda:
                example_image = Variable(example_image.view(1, 3, 128,
                                                            128)).cuda()
                example_landmark = Variable(
                    torch.FloatTensor(example_landmark.astype(float))).cuda()
            else:
                example_image = Variable(example_image.view(1, 3, 128, 128))
                example_landmark = Variable(
                    torch.FloatTensor(example_landmark.astype(float)))
        # Load speech and extract features
            example_landmark = example_landmark * 5.0
            example_landmark = example_landmark - mean.expand_as(
                example_landmark)
            example_landmark = torch.mm(example_landmark, pca)
            speech, sr = librosa.load(audio_file, sr=16000)
            mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)
            speech = np.insert(speech, 0, np.zeros(1920))
            speech = np.append(speech, np.zeros(1920))
            mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)

            # print (mfcc.shape)

            #sound, _ = librosa.load(test_file, sr=44100)

            print('=======================================')
            print('Start to generate images')
            t = time.time()
            ind = 3
            with torch.no_grad():
                fake_lmark = []
                input_mfcc = []

                while ind <= int(mfcc.shape[0] / 4) - 4:
                    t_mfcc = mfcc[(ind - 3) * 4:(ind + 4) * 4, 1:]
                    t_mfcc = torch.FloatTensor(t_mfcc).cuda()
                    if ind >= start_ids[i] and ind < end_ids[i]:
                        input_mfcc.append(t_mfcc)
                    ind += 1

                input_mfcc = torch.stack(input_mfcc, dim=0)
                input_mfcc = input_mfcc.unsqueeze(0)
                print(input_mfcc.shape)
                fake_lmark = encoder(example_landmark, input_mfcc)
                fake_lmark = fake_lmark.view(
                    fake_lmark.size(0) * fake_lmark.size(1), 6)
                example_landmark = torch.mm(example_landmark, pca.t())
                example_landmark = example_landmark + mean.expand_as(
                    example_landmark)
                fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
                    np.array([1.1, 1.2, 1.3, 1.4, 1.5])).cuda()
                fake_lmark = torch.mm(fake_lmark, pca.t())
                fake_lmark = fake_lmark + mean.expand_as(fake_lmark)

                fake_lmark = fake_lmark.unsqueeze(0)

                fake_ims, _, _, _ = decoder(example_image, fake_lmark,
                                            example_landmark)
                os.system('rm ../temp/*')
                for indx in range(fake_ims.size(1)):
                    fake_im = fake_ims[:, indx]
                    fake_store = fake_im.permute(0, 2, 3,
                                                 1).data.cpu().numpy()[0]
                    scipy.misc.imsave(
                        "{}/{:05d}.png".format(os.path.join('../', 'temp'),
                                               indx), fake_store)
                print(time.time() - t)
                fake_lmark = fake_lmark.data.cpu().numpy()
                # os.system('rm ../results/*')
                # np.save( os.path.join( config.sample_dir,  'obama_fake.npy'), fake_lmark)
                # fake_lmark = np.reshape(fake_lmark, (fake_lmark.shape[1], 68, 2))
                # utils.write_video_wpts_wsound(fake_lmark, sound, 44100, config.sample_dir, 'fake', [-1.0, 1.0], [-1.0, 1.0])
                video_name = os.path.join(config.sample_dir, video_name)
                # ffmpeg.input('../temp/*.png', pattern_type='glob', framerate=25).output(video_name).run()

                utils.image_to_video(os.path.join('../', 'temp'),
                                     video_name + '.mp4')
                utils.add_audio(video_name + '.mp4', audio_file)
                print('The generated video is: {}'.format(
                    os.path.join(config.sample_dir, video_name + '.mov')))
        except:
            continue

Exemple #6

0

Afficher le fichier

Fichier : atnet_test.py Projet : xiaoyun4/ATVGnet

def test():
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids
    config.is_train = 'test'
    if config.lstm == True:
        generator = AT_net()
    else:
        generator = AT_single()
    if config.dataset == 'grid':

        pca = torch.FloatTensor(np.load('../basics/U_grid.npy')[:, :6]).cuda()
        mean = torch.FloatTensor(np.load('../basics/mean_grid.npy')).cuda()
    elif config.dataset == 'lrw':

        pca = torch.FloatTensor(np.load('../basics/U_lrw1.npy')[:, :6]).cuda()
        mean = torch.FloatTensor(np.load('../basics/mean_lrw1.npy')).cuda()
    else:
        raise Exception('wrong key word for the dataset input')
    xLim = (-1.0, 1.0)
    yLim = (-1.0, 1.0)
    xLab = 'x'
    yLab = 'y'

    state_dict = multi2single(config.model_name, 1)
    generator.load_state_dict(state_dict)
    print('load pretrained [{}]'.format(config.model_name))

    if config.lstm:
        if config.dataset == 'lrw':
            dataset = LRW_1D_lstm_landmark_pca(config.dataset_dir,
                                               train=config.is_train)
        else:
            dataset = GRID_1D_lstm_landmark_pca(config.dataset_dir,
                                                train=config.is_train)
    else:
        if config.dataset == 'lrw':
            dataset = LRW_1D_single_landmark_pca(config.dataset_dir,
                                                 train=config.is_train)
        else:
            dataset = GRID_1D_single_landmark_pca(config.dataset_dir,
                                                  train=config.is_train)
    data_loader = DataLoader(dataset,
                             batch_size=config.batch_size,
                             num_workers=config.num_thread,
                             shuffle=False,
                             drop_last=True)
    data_iter = iter(data_loader)
    data_iter.next()
    if not os.path.exists(config.sample_dir):
        os.mkdir(config.sample_dir)
    if not os.path.exists(os.path.join(config.sample_dir, 'fake')):
        os.mkdir(os.path.join(config.sample_dir, 'fake'))
    if not os.path.exists(os.path.join(config.sample_dir, 'real')):
        os.mkdir(os.path.join(config.sample_dir, 'real'))
    if config.cuda:
        generator = generator.cuda()
    generator.eval()
    for step, (example_landmark, example_audio, lmark,
               audio) in enumerate(data_loader):
        with torch.no_grad():
            print(step)
            if step == 5:
                break
            if config.cuda:
                example_audio = Variable(example_audio.float()).cuda()
                lmark = Variable(lmark.float()).cuda()
                audio = Variable(audio.float()).cuda()
                example_landmark = Variable(example_landmark.float()).cuda()
            if config.lstm:
                fake_lmark = generator(example_landmark, audio)
                fake_lmark = fake_lmark.view(
                    fake_lmark.size(0) * fake_lmark.size(1), 6)
                fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
                    np.array([1.2, 1.4, 1.6, 1.8, 2.0])).cuda()
                fake_lmark = torch.mm(fake_lmark, pca.t())
                fake_lmark = fake_lmark + mean.expand_as(fake_lmark)
                fake_lmark = fake_lmark.view(config.batch_size, 16, 136)
                fake_lmark = fake_lmark.data.cpu().numpy()
                lmark = lmark.view(lmark.size(0) * lmark.size(1), 6)
                lmark = torch.mm(lmark, pca.t())
                lmark = lmark + mean.expand_as(lmark)
                lmark = lmark.view(config.batch_size, 16, 136)
                lmark = lmark.data.cpu().numpy()

                for indx in range(config.batch_size):
                    for jj in range(16):
                        name = "{}real_{}_{}_{}.png".format(
                            config.sample_dir, step, indx, jj)
                        utils.plot_flmarks(lmark[indx, jj],
                                           name,
                                           xLim,
                                           yLim,
                                           xLab,
                                           yLab,
                                           figsize=(10, 10))
                        name = "{}fake_{}_{}_{}.png".format(
                            config.sample_dir, step, indx, jj)
                        utils.plot_flmarks(fake_lmark[indx, jj],
                                           name,
                                           xLim,
                                           yLim,
                                           xLab,
                                           yLab,
                                           figsize=(10, 10))
            else:
                fake_lmark = generator(example_landmark, audio)
                fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
                    np.array([1.2, 1.4, 1.6, 1.8, 2.0])).cuda()
                fake_lmark = torch.mm(fake_lmark, pca.t())
                fake_lmark = fake_lmark + mean.expand_as(fake_lmark)
                fake_lmark = fake_lmark.data.cpu().numpy()
                lmark = torch.mm(lmark, pca.t())
                lmark = lmark + mean.expand_as(lmark)
                lmark = lmark.data.cpu().numpy()

                for indx in range(config.batch_size):
                    name = '{}real/real_{}.png'.format(
                        config.sample_dir, step * config.batch_size + indx)

                    utils.plot_flmarks(lmark[indx],
                                       name,
                                       xLim,
                                       yLim,
                                       xLab,
                                       yLab,
                                       figsize=(10, 10))

                    name = '{}fake/fake_{}.png'.format(
                        config.sample_dir, step * config.batch_size + indx)

                    utils.plot_flmarks(fake_lmark[indx],
                                       name,
                                       xLim,
                                       yLim,
                                       xLab,
                                       yLab,
                                       figsize=(10, 10))