Example #1
0
def find_best_frame(source, driving, cpu=False):
    import face_alignment

    def normalize_kp(kp):
        kp = kp - kp.mean(axis=0, keepdims=True)
        area = ConvexHull(kp[:, :2]).volume
        area = np.sqrt(area)
        kp[:, :2] = kp[:, :2] / area
        return kp

    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                      flip_input=True,
                                      device='cpu' if cpu else 'cuda')
    kp_source = fa.get_landmarks(255 * source)[0]
    kp_source = normalize_kp(kp_source)
    norm = float('inf')
    frame_num = 0
    for i, image in tqdm(enumerate(driving)):
        kp_driving = fa.get_landmarks(255 * image)[0]
        kp_driving = normalize_kp(kp_driving)
        new_norm = (np.abs(kp_source - kp_driving)**2).sum()
        if new_norm < norm:
            norm = new_norm
            frame_num = i
    return frame_num
Example #2
0
def single_pass(source, target_image, generator, kp_detector, kp_source, kp_driving_initial, relative=True, adapt_movement_scale=True, cpu=False):
	with torch.no_grad():
		#predictions = []
		#source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
		target_image = torch.tensor(target_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
		#initial_image = torch.tensor(initial_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
		#if not cpu:
			#source = source.cuda()

		#kp_source = kp_detector(source)
		#kp_driving_initial = kp_detector(initial_image)

		#print(source.shape)
		#input('source shape')

		#print(target_image.shape)
		#input('target_image_shape')


		driving_frame = target_image
		if not cpu:
			driving_frame = driving_frame.cuda()
		kp_driving = kp_detector(driving_frame)
		kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
							   kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
							   use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)
		out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
		#prediction = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]

	#return prediction
	return out['prediction'].data
Example #3
0
def make_animation(source_image,
                   driving_video,
                   generator,
                   kp_detector,
                   relative=True,
                   adapt_movement_scale=True):
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2).cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3).cuda()
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
Example #4
0
def predict(driving_frame, source_image, relative, adapt_movement_scale, fa, device='cuda'):
    global start_frame
    global start_frame_kp
    global kp_driving_initial
    global kp_source

    with torch.no_grad():
        source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device)
        driving = torch.tensor(driving_frame[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device)

        if kp_driving_initial is None:
            kp_driving_initial = kp_detector(driving)
            start_frame = driving_frame.copy()
            start_frame_kp = get_frame_kp(fa, driving_frame)

        if kp_source is None:
            kp_source = kp_detector(source)

        kp_driving = kp_detector(driving)
        kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
                            kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
                            use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)

        if opt.enc_downscale > 1:
            h, w = int(source.shape[2] / opt.enc_downscale), int(source.shape[3] / opt.enc_downscale)
            source_enc = torch.nn.functional.interpolate(source, size=(h, w), mode='bilinear')
        else:
            source_enc = None

        out = generator(source, kp_source=kp_source, kp_driving=kp_norm, source_image_enc=source_enc, optim_ret=True)

        out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]
        out = (np.clip(out, 0, 1) * 255).astype(np.uint8)

        return out
Example #5
0
def predict(driving_frame, source_image, relative, adapt_movement_scale, fa,
            generator, kp_detector, kp_driving_initial, device='cuda'):
    global start_frame
    global start_frame_kp
    # global kp_driving_initial

    with torch.no_grad():
        source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device)
        driving = torch.tensor(driving_frame[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device)
        kp_source = kp_detector(source)

        if kp_driving_initial is None:
            kp_driving_initial = kp_detector(driving)
            start_frame = driving_frame.copy()
            start_frame_kp = get_frame_kp(fa, driving_frame)

        kp_driving = kp_detector(driving)
        kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
                               kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
                               use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)
        out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

        out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]
        out = (np.clip(out, 0, 1) * 255).astype(np.uint8)

        return out
Example #6
0
def kp_animation(source_image,
                 kp,
                 generator,
                 relative=True,
                 adapt_movement_scale=True,
                 cpu=False):
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        kp_source = kp_driving_initial = kp[0]

        for kp_driving in kp:
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
Example #7
0
def generate_morphed_video(image, sub_video):
    with torch.no_grad():
        source = (
            torch.tensor(np.float32(image), device="cuda").permute(2, 0, 1).unsqueeze(0)
        )
        kp_source = kp_detector(source)

        for i, driving_frame in enumerate(sub_video):
            driving_frame = (
                torch.tensor(np.float32(driving_frame))
                .cuda()
                .permute(2, 0, 1)
                .unsqueeze(0)
            )

            if i == 0:
                kp_driving_initial = kp_detector(driving_frame)

            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(
                kp_source=kp_source,
                kp_driving=kp_driving,
                kp_driving_initial=kp_driving_initial,
                use_relative_movement=True,
                use_relative_jacobian=True,
                adapt_movement_scale=True,
            )
            yield generator(source, kp_source=kp_source, kp_driving=kp_norm)[
                "prediction"
            ].squeeze().permute(1, 2, 0).cpu().numpy()
Example #8
0
def make_animation(source_image_true,source_image_fake, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False):
    with torch.no_grad():
        sparse_d = []
        occlusion = []
        
        source_true = torch.tensor(source_image_true[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        source_fake = torch.tensor(source_image_fake[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source_true = source_true.cuda()
        driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3)
        kp_source = kp_detector(source_true)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial, use_relative_movement=relative,
                                   use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale)
            out = generator(source_true, source_fake,kp_source=kp_source, kp_driving=kp_norm)
            
            print(out['sparse_deformed'].data.cpu().numpy().size())
            sparse_d.append(np.transpose(out['sparse_deformed'].data.cpu().numpy(), [0, 1, 3, 4, 2])[0][0])
            occlusion.append(np.transpose(out['occlusion_map'].data.cpu().numpy(), [0, 2, 3, 1])[0])
    return sparse_d,occlusion
Example #9
0
    def predict(self, driving_frame):
        assert self.kp_source is not None, "call set_source_image()"

        with torch.no_grad():
            driving = to_tensor(driving_frame).to(self.device)

            if self.kp_driving_initial is None:
                self.kp_driving_initial = self.kp_detector(driving)
                self.start_frame = driving_frame.copy()
                self.start_frame_kp = self.get_frame_kp(driving_frame)

            kp_driving = self.kp_detector(driving)
            kp_norm = normalize_kp(
                kp_source=self.kp_source,
                kp_driving=kp_driving,
                kp_driving_initial=self.kp_driving_initial,
                use_relative_movement=self.relative,
                use_relative_jacobian=self.relative,
                adapt_movement_scale=self.adapt_movement_scale)

            out = self.generator(self.source,
                                 kp_source=self.kp_source,
                                 kp_driving=kp_norm)

            out = np.transpose(out['prediction'].data.cpu().numpy(),
                               [0, 2, 3, 1])[0]
            out = (np.clip(out, 0, 1) * 255).astype(np.uint8)

            return out
 def next(self, image):
     _, kp_image = self._tensor_image(image)
     result = []
     for tgt, kp in zip(self.targets, self.kp_targets):
         with torch.no_grad():
             norm = normalize_kp(kp, kp_image, self.kp_start, self.adapt_movement_scale, self.relative, self.relative)
             out = self.generator(tgt, kp_source=kp, kp_driving=norm)
         result.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0])
     return result
Example #11
0
def make_animation(source_image,
                   driving_video,
                   generator,
                   kp_detector,
                   relative=True,
                   adapt_movement_scale=True,
                   cpu=False):
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        # pprint('source', source) # [1, 3, 256, 256]
        # pprint('driving', driving) # [1, 3, 30, 256, 256]
        # pprint('kp_source_value', kp_source['value'])
        # pprint('kp_source_jacobian', kp_source['jacobian'])
        # pprint('kp_driving_initial', kp_driving_initial)
        flag = True

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

            # if flag:
            #     flag = False
            #     pprint('kp_driving', kp_driving)
            #     pprint('kp_norm', kp_norm)
            #     pprint('out', out)
            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
def make_animation(source_image,
                   driving_video,
                   generator,
                   kp_detector,
                   relative=True,
                   adapt_movement_scale=True,
                   cpu=False,
                   progress_var=None,
                   progress_label=None):
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        #tqdm(range(driving.shape[2]))
        #tqdm_gui(range(driving.shape[2]))
        for frame_idx in range(driving.shape[2]):
            percent = frame_idx / driving.shape[2] * 100
            if progress_var:
                progress_var.set(int(percent))
            if progress_label:
                progress_label.config(text=f'{percent:.3}%')

            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
        progress_label.config(text="Done!")
    return predictions
Example #13
0
def process(opt,img_orig, generator, kp_detector):
    img = resize(img_orig, (256, 256))[..., :3]
    kp_driving_initial = None
    with torch.no_grad():
        spm = torch.tensor(img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        source = spm.cpu() if opt.cpu else spm.cuda()
        kp_source = kp_detector(source)

        video = cv2.VideoCapture(opt.video)
        fps = video.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'avc1')
        vout = cv2.VideoWriter(opt.out, fourcc, fps, (256, 256))
        while True:
            frame_img = video.read()
            if isinstance(frame_img, tuple):
                frame_img = frame_img[1]
            if frame_img is None:
                print("Oops frame is None. Possibly camera or display does not work")
                break

            #frame_img = cv2.rotate(frame_img,cv2.ROTATE_180)
            frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)

            y, x, _ = frame_img.shape
            min_dim = min(y, x)
            startx = x // 2 - (min_dim // 2)
            starty = y // 2 - (min_dim // 2)
            frame_img = frame_img[starty:starty + min_dim, startx:startx + min_dim, :]

            frame_img = resize(frame_img, (256, 256))[..., :3]
            frame = torch.tensor(frame_img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)

            kp_driving = kp_detector(frame)
            if kp_driving_initial is None:
                kp_driving_initial = kp_driving
            kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial, use_relative_movement=opt.relative,
                                   use_relative_jacobian=opt.relative, adapt_movement_scale=opt.adapt_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            p = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]
            p = p*255
            p = p.astype(np.uint8)
            p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB)
            vout.write(p)

        video.release()
        vout.release()
Example #14
0
def make_animation_video(source_video,
                         driving_video,
                         generator,
                         kp_detector,
                         relative=True,
                         adapt_movement_scale=True,
                         cpu=False):
    with torch.no_grad():
        predictions = []
        #source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        #if not cpu:
        #    source = source.cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        source_v = torch.tensor(
            np.array(source_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        #kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        # first try  use source video as main counter of frames
        # source_video must have more frames than drivng video
        for frame_idx in tqdm(range(source_v.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            source_frame = source_v[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
                source_frame = source_frame.cuda()

            kp_driving = kp_detector(driving_frame)
            kp_source = kp_detector(source_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source_frame,
                            kp_source=kp_source,
                            kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
Example #15
0
    def predict(self, driving_frame):
        with torch.no_grad():
            driving = to_tensor(driving_frame).to(self.device)

            if self.kp_driving_initial is None:
                self.kp_driving_initial = self.kp_detector(driving)
                self.start_frame = driving_frame.copy()
                self.start_frame_kp = self.get_frame_kp(driving_frame)

            kp_driving = self.kp_detector(driving)
            kp_norm = normalize_kp(
                kp_source=self.kp_source,
                kp_driving=kp_driving,
                kp_driving_initial=self.kp_driving_initial,
                use_relative_movement=self.relative,
                use_relative_jacobian=self.relative,
                adapt_movement_scale=self.adapt_movement_scale)

            if self.enc_downscale > 1:
                h, w = int(source.shape[2] / self.enc_downscale), int(
                    source.shape[3] / self.enc_downscale)
                source_enc = torch.nn.functional.interpolate(source,
                                                             size=(h, w),
                                                             mode='bilinear')
            else:
                source_enc = None

            try:
                out = self.generator(self.source,
                                     kp_source=self.kp_source,
                                     kp_driving=kp_norm,
                                     source_image_enc=source_enc,
                                     optim_ret=True)
            except TypeError:
                Once('\n*** Please update FOMM:\ncd fomm\ngit pull\n')
                out = self.generator(self.source,
                                     kp_source=self.kp_source,
                                     kp_driving=kp_norm)

            out = np.transpose(out['prediction'].data.cpu().numpy(),
                               [0, 2, 3, 1])[0]
            out = (np.clip(out, 0, 1) * 255).astype(np.uint8)

            return out
Example #16
0
    def process(self, vframe):
        with torch.no_grad():
            y, x, _ = vframe.shape
            min_dim = min(y, x)
            startx = x // 2 - (min_dim // 2)
            starty = y // 2 - (min_dim // 2)
            vframe = vframe[starty:starty + min_dim,
                            startx:startx + min_dim, :]
            vframe = resize(vframe, (256, 256))[..., :3]
            if self.count < 60:
                self.count += 1
                p = np.concatenate([vframe[:, :, ::-1], self.img], axis=1)
                p = p * 255
                p = p.astype(np.uint8)
                p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB)
                return p

            frame = torch.tensor(vframe[np.newaxis].astype(
                np.float32)).permute(0, 3, 1, 2)

            kp_driving = kp_detector(frame)
            if self.kp_driving_initial is None:
                self.kp_driving_initial = kp_driving

            kp_norm = normalize_kp(kp_source=self.kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=self.kp_driving_initial,
                                   use_relative_movement=True,
                                   use_relative_jacobian=True,
                                   adapt_movement_scale=True)
            out = generator(self.source,
                            kp_source=self.kp_source,
                            kp_driving=kp_norm)

            p = np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0]

            p = np.concatenate([vframe[:, :, ::-1], p], axis=1)
            p = p * 255
            p = p.astype(np.uint8)
            p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB)
            self.count += 1
            return p
Example #17
0
    def do_feeding(self, task_meta: TaskMeta, generator):

        source_img = cv2.imread(task_meta.source_img_path)
        source_img = resize_image(source_img)
        source_faces = self.get_source_faces(source_img)

        if len(source_faces) == 0:
            return process_code.NO_AVAILABLE_FACE

        self.update_kp_sources(source_faces)

        resource, frame_data_list = self.resource_manager.get_cache_data(
            task_meta.material_file_path, task_meta.ppd_file_path)

        predictions = []
        for source_face in source_faces:
            best_frame_id = self.get_best_frame(source_face, frame_data_list)

            best_frame = resource.material[best_frame_id]
            best_frame = best_frame[:, :, ::-1].copy()
            frame = torch.tensor(best_frame[np.newaxis].astype(np.float32)) \
                .permute(0, 3, 1, 2).cuda() / 255.
            kp_driving_initial = self.kp_detector(frame)

            relative = True
            adapt_movement_scale = True
            for frame_data in frame_data_list:
                kp_norm = normalize_kp(
                    kp_source=source_face.kp_source,
                    kp_driving=frame_data.kp_by_detector,
                    kp_driving_initial=kp_driving_initial,
                    use_relative_movement=relative,
                    use_relative_jacobian=relative,
                    adapt_movement_scale=adapt_movement_scale)
                out = self.generator(source_face.face_img,
                                     kp_source=source_face.kp_source,
                                     kp_driving=kp_norm)
                predictions.append(
                    np.transpose(out['prediction'].data.cpu().numpy(),
                                 [0, 2, 3, 1])[0])
        return predictions
Example #18
0
def make_animation(source_image,
                   driving_video,
                   relative=True,
                   adapt_movement_scale=True,
                   cpu=False):
    generator, kp_detector = load_checkpoints.load_checkpoints(
        config_path='config/vox-256.yaml',
        checkpoint_path='data/vox-cpk.pth.tar',
        cpu=True)

    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in range(driving.shape[2]):
            progress.progress(frame_idx / float(driving.shape[2]))

            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
Example #19
0
def animate_image(source_image,
                  video_image,
                  orig_frame,
                  generator,
                  kp_detector,
                  relative=True,
                  adapt_movement_scale=True,
                  cpu=False):
    with torch.no_grad():
        initial_frame = torch.tensor(orig_frame[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        driving_frame = torch.tensor(video_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
            driving_frame = driving_frame.cuda()
            initial_frame = initial_frame.cuda()

        #print(driving_frame.shape,source.shape)
        #driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(initial_frame)

        kp_driving = kp_detector(driving_frame)
        kp_norm = normalize_kp(kp_source=kp_source,
                               kp_driving=kp_driving,
                               kp_driving_initial=kp_driving_initial,
                               use_relative_movement=relative,
                               use_relative_jacobian=relative,
                               adapt_movement_scale=adapt_movement_scale)
        out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
        out = np.transpose(out['prediction'].data.cpu().numpy(),
                           [0, 2, 3, 1])[0]
        #print(np.shape(out))
        return out
Example #20
0
def process(input):
    print("[INFO] loading source image and checkpoint...")
    source_path = input
    checkpoint_path = args['checkpoint']
    if args['input_video']:
        video_path = args['input_video']
    else:
        video_path = None
    source_image = imageio.imread(source_path)
    source_image = resize(source_image, (256, 256))[..., :3]

    generator, kp_detector = load_checkpoints(
        config_path='config/vox-256.yaml', checkpoint_path=checkpoint_path)

    # Load the cascade
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

    if not os.path.exists('output'):
        os.mkdir('output')

    relative = True
    adapt_movement_scale = True
    if args['cpu']:
        cpu = True
    else:
        cpu = False

    if video_path:
        cap = cv2.VideoCapture(video_path)
        print("[INFO] Loading video from the given path")
    else:
        cap = cv2.VideoCapture(0)
        print("[INFO] Initializing front camera...")
        # get vcap property
        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float `width`
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float `height`
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print('resolution : {} x {}'.format(width, height))
        print('frame rate : {} \nframe count : {}'.format(fps, frame_count))

    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out1 = cv2.VideoWriter('output/test.avi', fourcc, 12, (256 * 3, 256), True)

    cv2_source = cv2.cvtColor(source_image.astype('float32'),
                              cv2.COLOR_BGR2RGB)
    cv2_source2 = (source_image * 255).astype(np.uint8)

    if args['vc']:
        camera = pyfakewebcam.FakeWebcam('/dev/video7', 640, 360)
        camera._settings.fmt.pix.width = 640
        camera._settings.fmt.pix.height = 360

    img = np.zeros((360, 640, 3), dtype=np.uint8)
    yoff = round((360 - 256) / 2)
    xoff = round((640 - 256) / 2)
    img_im = img.copy()
    img_cv2_source = img.copy()
    img_im[:, :, 2] = 255
    img_cv2_source[:, :, 2] = 255
    with torch.no_grad():
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        kp_source = kp_detector(source)
        count = 0
        fps = []
        if args['csv']:
            line1 = []
            size = 10
            x_vec = np.linspace(0, 1, size + 1)[0:-1]
            y_vec = np.random.randn(len(x_vec))
        while (True):
            start = time.time()
            ret, frame = cap.read()
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            # Detect the faces
            faces = face_cascade.detectMultiScale(gray, 1.1, 4)
            frame = cv2.flip(frame, 1)
            if ret == True:

                if not video_path:
                    x = 143
                    y = 87
                    w = 322
                    h = 322
                    frame = frame[y:y + h, x:x + w]
                frame1 = resize(frame, (256, 256))[..., :3]

                if count == 0:
                    source_image1 = frame1
                    source1 = torch.tensor(source_image1[np.newaxis].astype(
                        np.float32)).permute(0, 3, 1, 2)
                    kp_driving_initial = kp_detector(source1)

                frame_test = torch.tensor(frame1[np.newaxis].astype(
                    np.float32)).permute(0, 3, 1, 2)

                driving_frame = frame_test
                if not cpu:
                    driving_frame = driving_frame.cuda()
                kp_driving = kp_detector(driving_frame)
                kp_norm = normalize_kp(
                    kp_source=kp_source,
                    kp_driving=kp_driving,
                    kp_driving_initial=kp_driving_initial,
                    use_relative_movement=relative,
                    use_relative_jacobian=relative,
                    adapt_movement_scale=adapt_movement_scale)
                out = generator(source,
                                kp_source=kp_source,
                                kp_driving=kp_norm)
                predictions.append(
                    np.transpose(out['prediction'].data.cpu().numpy(),
                                 [0, 2, 3, 1])[0])
                im = np.transpose(out['prediction'].data.cpu().numpy(),
                                  [0, 2, 3, 1])[0]
                #im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR)
                #cv2_source = cv2.cvtColor(cv2_source,cv2.COLOR_RGB2BGR)
                im = (np.array(im) * 255).astype(np.uint8)
                #cv2_source = (np.array(cv2_source)*255).astype(np.uint8)
                img_im[yoff:yoff + 256, xoff:xoff + 256] = im
                img_cv2_source[yoff:yoff + 256, xoff:xoff + 256] = cv2_source2
                #print(faces)
                #print(type(im))
                if args['debug']:
                    #print("[DEBUG] FPS : ",1.0 / (time.time()-start))
                    fps.append(1.0 / (time.time() - start))
                    if args['cpu']:
                        print("[DEBUG] Avg. of FPS using CPU : ", mean(fps))
                    else:
                        print("[DEBUG] Avg. of FPS using GPU : ", mean(fps))

                if args['csv']:
                    y_vec[-1] = mean(fps)
                    line1 = live_plotter(x_vec, y_vec, line1)
                    y_vec = np.append(y_vec[1:], 0.0)

                if args['vc']:
                    if np.array(faces).any():
                        #joinedFrame = np.concatenate((cv2_source,im,frame1),axis=1)
                        camera.schedule_frame(img_im)
                    else:
                        #joinedFrame = np.concatenate((cv2_source,cv2_source,frame1),axis=1)
                        camera.schedule_frame(img_cv2_source)
                    #cv2.imshow('Test',joinedFrame)
                    #out1.write(img_as_ubyte(np.array(im)))
                count += 1
            else:
                break

        cap.release()
        out1.release()
        cv2.destroyAllWindows()
Example #21
0
            if count == 0:
                source_image1 = frame1
                source1 = torch.tensor(source_image1[np.newaxis].astype(
                    np.float32)).permute(0, 3, 1, 2)
                kp_driving_initial = kp_detector(source1)

            frame_test = torch.tensor(frame1[np.newaxis].astype(
                np.float32)).permute(0, 3, 1, 2)

            driving_frame = frame_test
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
            im = np.transpose(out['prediction'].data.cpu().numpy(),
                              [0, 2, 3, 1])[0]
            im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
            joinedFrame = np.concatenate((cv2_source, im, frame1), axis=1)

            cv2.imshow('Test', joinedFrame)
            out1.write(img_as_ubyte(joinedFrame))
            count += 1
            if cv2.waitKey(20) & 0xFF == ord('q'):
Example #22
0
def make_animation(source_images,
                   driving_video,
                   generator,
                   kp_detector,
                   relative=True,
                   adapt_movement_scale=True,
                   cpu=False):
    with torch.no_grad():
        predictions = []
        source = [
            torch.tensor(s[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
            for s in source_images
        ]
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        if not cpu:
            source = [s.cuda() for s in source]
        kp_source = [kp_detector(s) for s in source]
        kp_source_value = [
            kp_s['value'][0].detach().cpu().numpy() for kp_s in kp_source
        ]
        kp_driving_initial = kp_detector(driving[:, :, 0])

        distance = lambda y: lambda x: np.sum(np.sum((x - y)**2, axis=1)**0.5)

        kp_frame_value = kp_driving_initial['value'][0].detach().cpu().numpy()
        i_prev = np.argmin(list(map(distance(kp_frame_value),
                                    kp_source_value)))
        kp_source_prev, source_prev = kp_source[i_prev], source[i_prev]

        diff = 20
        alpha = 0
        n = len(source_images)

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            if not cpu:
                driving_frame = driving_frame.cuda()
            kp_driving = kp_detector(driving_frame)
            kp_frame_value = kp_driving['value'][0].detach().cpu().numpy()

            i = np.argmin(
                list(
                    map(
                        distance(kp_frame_value),
                        kp_source_value[max(0, i_prev -
                                            diff):min(n, i_prev + diff)])))
            i += max(0, i_prev - diff)

            if i != i_prev:
                kp_source_prev['value'] = (kp_source_prev['value'] +
                                           kp_source[i]['value']) / 2
                kp_source_prev['jacobian'] = (kp_source_prev['jacobian'] +
                                              kp_source[i]['jacobian']) / 2
                source_prev = (source_prev + source[i]) / 2
                i_prev = i
            else:
                kp_source_prev['value'] = alpha * kp_source_prev['value'] + (
                    1 - alpha) * kp_source[i]['value']
                kp_source_prev['jacobian'] = alpha * kp_source_prev[
                    'jacobian'] + (1 - alpha) * kp_source[i]['jacobian']
                source_prev = alpha * source_prev + (1 - alpha) * source[i]

            kp_norm = normalize_kp(kp_source=kp_source_prev,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source_prev,
                            kp_source=kp_source_prev,
                            kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])
    return predictions
def process_task(task_id, opt, img_orig, video_file, out_file, generator, kp_detector):
    LOG.error(f"Processing task {task_id}...")
    img = resize(img_orig, (256, 256))[..., :3]
    kp_driving_initial = None
    with torch.no_grad():
        spm = torch.tensor(img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2)
        source = spm.cpu() if opt.cpu else spm.cuda()
        kp_source = kp_detector(source)

        video = cv2.VideoCapture(video_file)
        fps = video.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*"avc1")
        vout = cv2.VideoWriter(out_file, fourcc, fps, (256, 256))
        frames_count = int(cv2.VideoCapture.get(video, cv2.CAP_PROP_FRAME_COUNT))
        frame_counter = 0
        last_percent = 0
        last_time = time.time()

        while True:
            frame_img = video.read()
            if isinstance(frame_img, tuple):
                frame_img = frame_img[1]
            if frame_img is None:
                print("Oops frame is None. Possibly camera or display does not work")
                break

            # frame_img = cv2.rotate(frame_img,cv2.ROTATE_180)
            frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB)

            y, x, _ = frame_img.shape
            min_dim = min(y, x)
            startx = x // 2 - (min_dim // 2)
            starty = y // 2 - (min_dim // 2)
            frame_img = frame_img[
                starty : starty + min_dim, startx : startx + min_dim, :
            ]

            frame_img = resize(frame_img, (256, 256))[..., :3]
            frame = torch.tensor(frame_img[np.newaxis].astype(np.float32)).permute(
                0, 3, 1, 2
            )

            kp_driving = kp_detector(frame)
            if kp_driving_initial is None:
                kp_driving_initial = kp_driving
            kp_norm = normalize_kp(
                kp_source=kp_source,
                kp_driving=kp_driving,
                kp_driving_initial=kp_driving_initial,
                use_relative_movement=opt.relative,
                use_relative_jacobian=opt.relative,
                adapt_movement_scale=opt.adapt_scale,
            )
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            p = np.transpose(out["prediction"].data.cpu().numpy(), [0, 2, 3, 1])[0]
            p = p * 255
            p = p.astype(np.uint8)
            p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB)
            vout.write(p)

            frame_counter += 1

            percent = int(frame_counter / frames_count * 100)
            now = time.time()
            if percent != last_percent and now - last_time > 1:
                last_percent = percent
                last_time = now
                send_status(opt, task_id, percent=min(100, percent))
                LOG.info(f"processed {frame_counter}/{frames_count} frames")

        if last_percent != 100:
            send_status(opt, task_id, percent=100)

        video.release()
        vout.release()
        LOG.info(f"Task {task_id} done, file {out_file} written!")
Example #24
0
def overfit(config,
            source_image,
            driving_video,
            generator,
            kp_detector,
            lowres_video,
            relative=True,
            adapt_movement_scale=True,
            cpu=False):
    overfit_epochs = 10

    train_params = config['train_params']
    optimizer_generator = torch.optim.Adam(generator.parameters(),
                                           lr=train_params['lr_generator'],
                                           betas=(0.5, 0.999))
    optimizer_kp_detector = torch.optim.Adam(kp_detector.parameters(),
                                             lr=train_params['lr_kp_detector'],
                                             betas=(0.5, 0.999))

    scheduler_generator = MultiStepLR(optimizer_generator,
                                      train_params['epoch_milestones'],
                                      gamma=0.1,
                                      last_epoch=-1)

    scheduler_kp_detector = MultiStepLR(optimizer_kp_detector,
                                        train_params['epoch_milestones'],
                                        gamma=0.1,
                                        last_epoch=-1)

    for epoch in trange(0, overfit_epochs):
        predictions = []
        source = torch.tensor(source_image[np.newaxis].astype(
            np.float32)).permute(0, 3, 1, 2)
        if not cpu:
            source = source.cuda()
        driving = torch.tensor(
            np.array(driving_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        lowres = torch.tensor(
            np.array(lowres_video)[np.newaxis].astype(np.float32)).permute(
                0, 4, 1, 2, 3)
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            lowres_frame = lowres[:, :, frame_idx]

            if not cpu:
                driving_frame = driving_frame.cuda()
                lowres_frame = lowres_frame.cuda()

            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
            resized_prediction = transforms.Resize(
                (256, 256))(out['prediction'])[None]
            loss = F.mse_loss(torch.squeeze(resized_prediction, 0),
                              lowres_frame.detach())
            loss.backward()

            optimizer_generator.step()
            optimizer_generator.zero_grad()
            optimizer_kp_detector.step()
            optimizer_kp_detector.zero_grad()

            predictions.append(
                np.transpose(out['prediction'].data.cpu().numpy(),
                             [0, 2, 3, 1])[0])

        scheduler_generator.step()
        scheduler_kp_detector.step()

    return predictions