コード例 #1
0
ファイル: video_dataset.py プロジェクト: lichnost/head2head
    def __getitem__(self, index):
        # Get sequence paths.
        seq_idx = self.update_frame_index(self.nmfc_video_paths, index)
        nmfc_video_paths = self.nmfc_video_paths[seq_idx]
        nmfc_len = len(nmfc_video_paths)
        rgb_video_paths = self.rgb_video_paths[seq_idx]
        if not self.opt.no_eye_gaze or (not self.opt.no_mouth_D and self.opt.isTrain) or (self.opt.use_eyes_D and self.opt.isTrain):
            landmark_video_paths = self.landmark_video_paths[seq_idx]

        # Get parameters and transforms.
        n_frames_total, start_idx = get_video_parameters(self.opt, self.n_frames_total, nmfc_len, self.frame_idx)
        first_nmfc_image = Image.open(nmfc_video_paths[0]).convert('RGB')
        params = get_params(self.opt, first_nmfc_image.size)
        transform_scale_nmfc_video = get_transform(self.opt, params, normalize=False,
            augment=not self.opt.no_augment_input and self.opt.isTrain) # do not normalize nmfc but augment.
        transform_scale_eye_gaze_video = transform_scale_nmfc_video #get_transform(self.opt, params, normalize=False) # do not normalize eye_gaze.
        transform_scale_rgb_video = get_transform(self.opt, params)
        change_seq = False if self.opt.isTrain else self.change_seq

        # Read data.
        A_paths = []
        rgb_video = nmfc_video = eye_video = mouth_centers = eyes_centers = 0
        for i in range(n_frames_total):
            # NMFC
            nmfc_video_path = nmfc_video_paths[start_idx + i]
            nmfc_video_i = self.get_image(nmfc_video_path, transform_scale_nmfc_video)
            nmfc_video = nmfc_video_i if i == 0 else torch.cat([nmfc_video, nmfc_video_i], dim=0)
            # RGB
            rgb_video_path = rgb_video_paths[start_idx + i]
            rgb_video_i = self.get_image(rgb_video_path, transform_scale_rgb_video)
            rgb_video = rgb_video_i if i == 0 else torch.cat([rgb_video, rgb_video_i], dim=0)
            A_paths.append(nmfc_video_path)
            if not self.opt.no_eye_gaze:
                landmark_video_path = landmark_video_paths[start_idx + i]
                eye_video_i = create_eyes_image(landmark_video_path, first_nmfc_image.size,
                                                transform_scale_eye_gaze_video,
                                                add_noise=self.opt.isTrain)
                eye_video = eye_video_i if i == 0 else torch.cat([eye_video, eye_video_i], dim=0)
            if not self.opt.no_mouth_D and self.opt.isTrain:
                landmark_video_path = landmark_video_paths[start_idx + i]
                mouth_centers_i = self.get_mouth_center(landmark_video_path)
                mouth_centers = mouth_centers_i if i == 0 else torch.cat([mouth_centers, mouth_centers_i], dim=0)
            if self.opt.use_eyes_D and self.opt.isTrain:
                landmark_video_path = landmark_video_paths[start_idx + i]
                eyes_centers_i = self.get_eyes_center(landmark_video_path)
                eyes_centers = eyes_centers_i if i == 0 else torch.cat([eyes_centers, eyes_centers_i], dim=0)

        return_list = {'nmfc_video': nmfc_video, 'rgb_video':rgb_video,
                       'eye_video':eye_video, 'mouth_centers':mouth_centers, 'eyes_centers':eyes_centers,
                       'change_seq':change_seq, 'A_paths':A_paths}
        return return_list
コード例 #2
0
def compute_fake_video(input_queue, output_queue, modelG, opt):
    input_A_all = None
    while True:
        # Read input.
        conditional_input = input_queue.get()
        nmfc, eye_landmarks, real_frame = conditional_input
        width, height = nmfc.shape[0:2]
        # Create tensors
        params = get_params(opt, (width, height))
        transform_scale_nmfc_video = get_transform(opt,
                                                   params,
                                                   normalize=False,
                                                   augment=False)
        nmfc = transform_scale_nmfc_video(Image.fromarray(nmfc))
        transform_scale_eye_gaze_video = get_transform(opt,
                                                       params,
                                                       normalize=False)
        eye_gaze = create_eyes_image(None, (width, height),
                                     transform_scale_eye_gaze_video,
                                     add_noise=False,
                                     pts=eye_landmarks)
        # Concat conditional inputs.
        input_A = torch.cat([nmfc, eye_gaze], dim=0)
        if input_A_all is None:
            # If no previously generated frames available, pad zeros
            input_A_all = torch.cat([
                torch.zeros((opt.n_frames_G - 1) * opt.input_nc, width,
                            height), input_A
            ],
                                    dim=0)
        else:
            # Discard oldest conditional input and append new one.
            input_A_all = torch.cat(
                [input_A_all[opt.input_nc:, :, :], input_A], dim=0)
        input_A_final = input_A_all.view(1, -1, opt.input_nc, width, height)
        # Forward pass through Generator.
        generated = modelG.inference(input_A_final, None)
        fake_frame = util.tensor2im(generated[0].data[0])
        # Write results to Queue.
        output_queue.put((fake_frame, real_frame))