def find_best_frame(source, driving, cpu=False): import face_alignment def normalize_kp(kp): kp = kp - kp.mean(axis=0, keepdims=True) area = ConvexHull(kp[:, :2]).volume area = np.sqrt(area) kp[:, :2] = kp[:, :2] / area return kp fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True, device='cpu' if cpu else 'cuda') kp_source = fa.get_landmarks(255 * source)[0] kp_source = normalize_kp(kp_source) norm = float('inf') frame_num = 0 for i, image in tqdm(enumerate(driving)): kp_driving = fa.get_landmarks(255 * image)[0] kp_driving = normalize_kp(kp_driving) new_norm = (np.abs(kp_source - kp_driving)**2).sum() if new_norm < norm: norm = new_norm frame_num = i return frame_num
def single_pass(source, target_image, generator, kp_detector, kp_source, kp_driving_initial, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): #predictions = [] #source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) target_image = torch.tensor(target_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) #initial_image = torch.tensor(initial_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) #if not cpu: #source = source.cuda() #kp_source = kp_detector(source) #kp_driving_initial = kp_detector(initial_image) #print(source.shape) #input('source shape') #print(target_image.shape) #input('target_image_shape') driving_frame = target_image if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) #prediction = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] #return prediction return out['prediction'].data
def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True): with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2).cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3).cuda() kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def predict(driving_frame, source_image, relative, adapt_movement_scale, fa, device='cuda'): global start_frame global start_frame_kp global kp_driving_initial global kp_source with torch.no_grad(): source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device) driving = torch.tensor(driving_frame[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device) if kp_driving_initial is None: kp_driving_initial = kp_detector(driving) start_frame = driving_frame.copy() start_frame_kp = get_frame_kp(fa, driving_frame) if kp_source is None: kp_source = kp_detector(source) kp_driving = kp_detector(driving) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) if opt.enc_downscale > 1: h, w = int(source.shape[2] / opt.enc_downscale), int(source.shape[3] / opt.enc_downscale) source_enc = torch.nn.functional.interpolate(source, size=(h, w), mode='bilinear') else: source_enc = None out = generator(source, kp_source=kp_source, kp_driving=kp_norm, source_image_enc=source_enc, optim_ret=True) out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) return out
def predict(driving_frame, source_image, relative, adapt_movement_scale, fa, generator, kp_detector, kp_driving_initial, device='cuda'): global start_frame global start_frame_kp # global kp_driving_initial with torch.no_grad(): source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device) driving = torch.tensor(driving_frame[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to(device) kp_source = kp_detector(source) if kp_driving_initial is None: kp_driving_initial = kp_detector(driving) start_frame = driving_frame.copy() start_frame_kp = get_frame_kp(fa, driving_frame) kp_driving = kp_detector(driving) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) return out
def kp_animation(source_image, kp, generator, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() kp_source = kp_driving_initial = kp[0] for kp_driving in kp: kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def generate_morphed_video(image, sub_video): with torch.no_grad(): source = ( torch.tensor(np.float32(image), device="cuda").permute(2, 0, 1).unsqueeze(0) ) kp_source = kp_detector(source) for i, driving_frame in enumerate(sub_video): driving_frame = ( torch.tensor(np.float32(driving_frame)) .cuda() .permute(2, 0, 1) .unsqueeze(0) ) if i == 0: kp_driving_initial = kp_detector(driving_frame) kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp( kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=True, use_relative_jacobian=True, adapt_movement_scale=True, ) yield generator(source, kp_source=kp_source, kp_driving=kp_norm)[ "prediction" ].squeeze().permute(1, 2, 0).cpu().numpy()
def make_animation(source_image_true,source_image_fake, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): sparse_d = [] occlusion = [] source_true = torch.tensor(source_image_true[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) source_fake = torch.tensor(source_image_fake[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) if not cpu: source_true = source_true.cuda() driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3) kp_source = kp_detector(source_true) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source_true, source_fake,kp_source=kp_source, kp_driving=kp_norm) print(out['sparse_deformed'].data.cpu().numpy().size()) sparse_d.append(np.transpose(out['sparse_deformed'].data.cpu().numpy(), [0, 1, 3, 4, 2])[0][0]) occlusion.append(np.transpose(out['occlusion_map'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return sparse_d,occlusion
def predict(self, driving_frame): assert self.kp_source is not None, "call set_source_image()" with torch.no_grad(): driving = to_tensor(driving_frame).to(self.device) if self.kp_driving_initial is None: self.kp_driving_initial = self.kp_detector(driving) self.start_frame = driving_frame.copy() self.start_frame_kp = self.get_frame_kp(driving_frame) kp_driving = self.kp_detector(driving) kp_norm = normalize_kp( kp_source=self.kp_source, kp_driving=kp_driving, kp_driving_initial=self.kp_driving_initial, use_relative_movement=self.relative, use_relative_jacobian=self.relative, adapt_movement_scale=self.adapt_movement_scale) out = self.generator(self.source, kp_source=self.kp_source, kp_driving=kp_norm) out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) return out
def next(self, image): _, kp_image = self._tensor_image(image) result = [] for tgt, kp in zip(self.targets, self.kp_targets): with torch.no_grad(): norm = normalize_kp(kp, kp_image, self.kp_start, self.adapt_movement_scale, self.relative, self.relative) out = self.generator(tgt, kp_source=kp, kp_driving=norm) result.append(np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return result
def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) # pprint('source', source) # [1, 3, 256, 256] # pprint('driving', driving) # [1, 3, 30, 256, 256] # pprint('kp_source_value', kp_source['value']) # pprint('kp_source_jacobian', kp_source['jacobian']) # pprint('kp_driving_initial', kp_driving_initial) flag = True for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) # if flag: # flag = False # pprint('kp_driving', kp_driving) # pprint('kp_norm', kp_norm) # pprint('out', out) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False, progress_var=None, progress_label=None): with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) #tqdm(range(driving.shape[2])) #tqdm_gui(range(driving.shape[2])) for frame_idx in range(driving.shape[2]): percent = frame_idx / driving.shape[2] * 100 if progress_var: progress_var.set(int(percent)) if progress_label: progress_label.config(text=f'{percent:.3}%') driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) progress_label.config(text="Done!") return predictions
def process(opt,img_orig, generator, kp_detector): img = resize(img_orig, (256, 256))[..., :3] kp_driving_initial = None with torch.no_grad(): spm = torch.tensor(img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) source = spm.cpu() if opt.cpu else spm.cuda() kp_source = kp_detector(source) video = cv2.VideoCapture(opt.video) fps = video.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'avc1') vout = cv2.VideoWriter(opt.out, fourcc, fps, (256, 256)) while True: frame_img = video.read() if isinstance(frame_img, tuple): frame_img = frame_img[1] if frame_img is None: print("Oops frame is None. Possibly camera or display does not work") break #frame_img = cv2.rotate(frame_img,cv2.ROTATE_180) frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB) y, x, _ = frame_img.shape min_dim = min(y, x) startx = x // 2 - (min_dim // 2) starty = y // 2 - (min_dim // 2) frame_img = frame_img[starty:starty + min_dim, startx:startx + min_dim, :] frame_img = resize(frame_img, (256, 256))[..., :3] frame = torch.tensor(frame_img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) kp_driving = kp_detector(frame) if kp_driving_initial is None: kp_driving_initial = kp_driving kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=opt.relative, use_relative_jacobian=opt.relative, adapt_movement_scale=opt.adapt_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) p = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] p = p*255 p = p.astype(np.uint8) p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB) vout.write(p) video.release() vout.release()
def make_animation_video(source_video, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): predictions = [] #source = torch.tensor(source_image[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) #if not cpu: # source = source.cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) source_v = torch.tensor( np.array(source_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) #kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) # first try use source video as main counter of frames # source_video must have more frames than drivng video for frame_idx in tqdm(range(source_v.shape[2])): driving_frame = driving[:, :, frame_idx] source_frame = source_v[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() source_frame = source_frame.cuda() kp_driving = kp_detector(driving_frame) kp_source = kp_detector(source_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source_frame, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def predict(self, driving_frame): with torch.no_grad(): driving = to_tensor(driving_frame).to(self.device) if self.kp_driving_initial is None: self.kp_driving_initial = self.kp_detector(driving) self.start_frame = driving_frame.copy() self.start_frame_kp = self.get_frame_kp(driving_frame) kp_driving = self.kp_detector(driving) kp_norm = normalize_kp( kp_source=self.kp_source, kp_driving=kp_driving, kp_driving_initial=self.kp_driving_initial, use_relative_movement=self.relative, use_relative_jacobian=self.relative, adapt_movement_scale=self.adapt_movement_scale) if self.enc_downscale > 1: h, w = int(source.shape[2] / self.enc_downscale), int( source.shape[3] / self.enc_downscale) source_enc = torch.nn.functional.interpolate(source, size=(h, w), mode='bilinear') else: source_enc = None try: out = self.generator(self.source, kp_source=self.kp_source, kp_driving=kp_norm, source_image_enc=source_enc, optim_ret=True) except TypeError: Once('\n*** Please update FOMM:\ncd fomm\ngit pull\n') out = self.generator(self.source, kp_source=self.kp_source, kp_driving=kp_norm) out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] out = (np.clip(out, 0, 1) * 255).astype(np.uint8) return out
def process(self, vframe): with torch.no_grad(): y, x, _ = vframe.shape min_dim = min(y, x) startx = x // 2 - (min_dim // 2) starty = y // 2 - (min_dim // 2) vframe = vframe[starty:starty + min_dim, startx:startx + min_dim, :] vframe = resize(vframe, (256, 256))[..., :3] if self.count < 60: self.count += 1 p = np.concatenate([vframe[:, :, ::-1], self.img], axis=1) p = p * 255 p = p.astype(np.uint8) p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB) return p frame = torch.tensor(vframe[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) kp_driving = kp_detector(frame) if self.kp_driving_initial is None: self.kp_driving_initial = kp_driving kp_norm = normalize_kp(kp_source=self.kp_source, kp_driving=kp_driving, kp_driving_initial=self.kp_driving_initial, use_relative_movement=True, use_relative_jacobian=True, adapt_movement_scale=True) out = generator(self.source, kp_source=self.kp_source, kp_driving=kp_norm) p = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] p = np.concatenate([vframe[:, :, ::-1], p], axis=1) p = p * 255 p = p.astype(np.uint8) p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB) self.count += 1 return p
def do_feeding(self, task_meta: TaskMeta, generator): source_img = cv2.imread(task_meta.source_img_path) source_img = resize_image(source_img) source_faces = self.get_source_faces(source_img) if len(source_faces) == 0: return process_code.NO_AVAILABLE_FACE self.update_kp_sources(source_faces) resource, frame_data_list = self.resource_manager.get_cache_data( task_meta.material_file_path, task_meta.ppd_file_path) predictions = [] for source_face in source_faces: best_frame_id = self.get_best_frame(source_face, frame_data_list) best_frame = resource.material[best_frame_id] best_frame = best_frame[:, :, ::-1].copy() frame = torch.tensor(best_frame[np.newaxis].astype(np.float32)) \ .permute(0, 3, 1, 2).cuda() / 255. kp_driving_initial = self.kp_detector(frame) relative = True adapt_movement_scale = True for frame_data in frame_data_list: kp_norm = normalize_kp( kp_source=source_face.kp_source, kp_driving=frame_data.kp_by_detector, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = self.generator(source_face.face_img, kp_source=source_face.kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def make_animation(source_image, driving_video, relative=True, adapt_movement_scale=True, cpu=False): generator, kp_detector = load_checkpoints.load_checkpoints( config_path='config/vox-256.yaml', checkpoint_path='data/vox-cpk.pth.tar', cpu=True) with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in range(driving.shape[2]): progress.progress(frame_idx / float(driving.shape[2])) driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def animate_image(source_image, video_image, orig_frame, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): initial_frame = torch.tensor(orig_frame[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) driving_frame = torch.tensor(video_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving_frame = driving_frame.cuda() initial_frame = initial_frame.cuda() #print(driving_frame.shape,source.shape) #driving = torch.tensor(np.array(driving_video)[np.newaxis].astype(np.float32)).permute(0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(initial_frame) kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) out = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] #print(np.shape(out)) return out
def process(input): print("[INFO] loading source image and checkpoint...") source_path = input checkpoint_path = args['checkpoint'] if args['input_video']: video_path = args['input_video'] else: video_path = None source_image = imageio.imread(source_path) source_image = resize(source_image, (256, 256))[..., :3] generator, kp_detector = load_checkpoints( config_path='config/vox-256.yaml', checkpoint_path=checkpoint_path) # Load the cascade face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') if not os.path.exists('output'): os.mkdir('output') relative = True adapt_movement_scale = True if args['cpu']: cpu = True else: cpu = False if video_path: cap = cv2.VideoCapture(video_path) print("[INFO] Loading video from the given path") else: cap = cv2.VideoCapture(0) print("[INFO] Initializing front camera...") # get vcap property width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float `width` height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float `height` fps = cap.get(cv2.CAP_PROP_FPS) frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) print('resolution : {} x {}'.format(width, height)) print('frame rate : {} \nframe count : {}'.format(fps, frame_count)) fourcc = cv2.VideoWriter_fourcc(*'MJPG') out1 = cv2.VideoWriter('output/test.avi', fourcc, 12, (256 * 3, 256), True) cv2_source = cv2.cvtColor(source_image.astype('float32'), cv2.COLOR_BGR2RGB) cv2_source2 = (source_image * 255).astype(np.uint8) if args['vc']: camera = pyfakewebcam.FakeWebcam('/dev/video7', 640, 360) camera._settings.fmt.pix.width = 640 camera._settings.fmt.pix.height = 360 img = np.zeros((360, 640, 3), dtype=np.uint8) yoff = round((360 - 256) / 2) xoff = round((640 - 256) / 2) img_im = img.copy() img_cv2_source = img.copy() img_im[:, :, 2] = 255 img_cv2_source[:, :, 2] = 255 with torch.no_grad(): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() kp_source = kp_detector(source) count = 0 fps = [] if args['csv']: line1 = [] size = 10 x_vec = np.linspace(0, 1, size + 1)[0:-1] y_vec = np.random.randn(len(x_vec)) while (True): start = time.time() ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Detect the faces faces = face_cascade.detectMultiScale(gray, 1.1, 4) frame = cv2.flip(frame, 1) if ret == True: if not video_path: x = 143 y = 87 w = 322 h = 322 frame = frame[y:y + h, x:x + w] frame1 = resize(frame, (256, 256))[..., :3] if count == 0: source_image1 = frame1 source1 = torch.tensor(source_image1[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) kp_driving_initial = kp_detector(source1) frame_test = torch.tensor(frame1[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) driving_frame = frame_test if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp( kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) im = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] #im = cv2.cvtColor(im,cv2.COLOR_RGB2BGR) #cv2_source = cv2.cvtColor(cv2_source,cv2.COLOR_RGB2BGR) im = (np.array(im) * 255).astype(np.uint8) #cv2_source = (np.array(cv2_source)*255).astype(np.uint8) img_im[yoff:yoff + 256, xoff:xoff + 256] = im img_cv2_source[yoff:yoff + 256, xoff:xoff + 256] = cv2_source2 #print(faces) #print(type(im)) if args['debug']: #print("[DEBUG] FPS : ",1.0 / (time.time()-start)) fps.append(1.0 / (time.time() - start)) if args['cpu']: print("[DEBUG] Avg. of FPS using CPU : ", mean(fps)) else: print("[DEBUG] Avg. of FPS using GPU : ", mean(fps)) if args['csv']: y_vec[-1] = mean(fps) line1 = live_plotter(x_vec, y_vec, line1) y_vec = np.append(y_vec[1:], 0.0) if args['vc']: if np.array(faces).any(): #joinedFrame = np.concatenate((cv2_source,im,frame1),axis=1) camera.schedule_frame(img_im) else: #joinedFrame = np.concatenate((cv2_source,cv2_source,frame1),axis=1) camera.schedule_frame(img_cv2_source) #cv2.imshow('Test',joinedFrame) #out1.write(img_as_ubyte(np.array(im))) count += 1 else: break cap.release() out1.release() cv2.destroyAllWindows()
if count == 0: source_image1 = frame1 source1 = torch.tensor(source_image1[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) kp_driving_initial = kp_detector(source1) frame_test = torch.tensor(frame1[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) driving_frame = frame_test if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) im = np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0] im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) joinedFrame = np.concatenate((cv2_source, im, frame1), axis=1) cv2.imshow('Test', joinedFrame) out1.write(img_as_ubyte(joinedFrame)) count += 1 if cv2.waitKey(20) & 0xFF == ord('q'):
def make_animation(source_images, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=True, cpu=False): with torch.no_grad(): predictions = [] source = [ torch.tensor(s[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) for s in source_images ] driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) if not cpu: source = [s.cuda() for s in source] kp_source = [kp_detector(s) for s in source] kp_source_value = [ kp_s['value'][0].detach().cpu().numpy() for kp_s in kp_source ] kp_driving_initial = kp_detector(driving[:, :, 0]) distance = lambda y: lambda x: np.sum(np.sum((x - y)**2, axis=1)**0.5) kp_frame_value = kp_driving_initial['value'][0].detach().cpu().numpy() i_prev = np.argmin(list(map(distance(kp_frame_value), kp_source_value))) kp_source_prev, source_prev = kp_source[i_prev], source[i_prev] diff = 20 alpha = 0 n = len(source_images) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() kp_driving = kp_detector(driving_frame) kp_frame_value = kp_driving['value'][0].detach().cpu().numpy() i = np.argmin( list( map( distance(kp_frame_value), kp_source_value[max(0, i_prev - diff):min(n, i_prev + diff)]))) i += max(0, i_prev - diff) if i != i_prev: kp_source_prev['value'] = (kp_source_prev['value'] + kp_source[i]['value']) / 2 kp_source_prev['jacobian'] = (kp_source_prev['jacobian'] + kp_source[i]['jacobian']) / 2 source_prev = (source_prev + source[i]) / 2 i_prev = i else: kp_source_prev['value'] = alpha * kp_source_prev['value'] + ( 1 - alpha) * kp_source[i]['value'] kp_source_prev['jacobian'] = alpha * kp_source_prev[ 'jacobian'] + (1 - alpha) * kp_source[i]['jacobian'] source_prev = alpha * source_prev + (1 - alpha) * source[i] kp_norm = normalize_kp(kp_source=kp_source_prev, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source_prev, kp_source=kp_source_prev, kp_driving=kp_norm) predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) return predictions
def process_task(task_id, opt, img_orig, video_file, out_file, generator, kp_detector): LOG.error(f"Processing task {task_id}...") img = resize(img_orig, (256, 256))[..., :3] kp_driving_initial = None with torch.no_grad(): spm = torch.tensor(img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2) source = spm.cpu() if opt.cpu else spm.cuda() kp_source = kp_detector(source) video = cv2.VideoCapture(video_file) fps = video.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*"avc1") vout = cv2.VideoWriter(out_file, fourcc, fps, (256, 256)) frames_count = int(cv2.VideoCapture.get(video, cv2.CAP_PROP_FRAME_COUNT)) frame_counter = 0 last_percent = 0 last_time = time.time() while True: frame_img = video.read() if isinstance(frame_img, tuple): frame_img = frame_img[1] if frame_img is None: print("Oops frame is None. Possibly camera or display does not work") break # frame_img = cv2.rotate(frame_img,cv2.ROTATE_180) frame_img = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB) y, x, _ = frame_img.shape min_dim = min(y, x) startx = x // 2 - (min_dim // 2) starty = y // 2 - (min_dim // 2) frame_img = frame_img[ starty : starty + min_dim, startx : startx + min_dim, : ] frame_img = resize(frame_img, (256, 256))[..., :3] frame = torch.tensor(frame_img[np.newaxis].astype(np.float32)).permute( 0, 3, 1, 2 ) kp_driving = kp_detector(frame) if kp_driving_initial is None: kp_driving_initial = kp_driving kp_norm = normalize_kp( kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=opt.relative, use_relative_jacobian=opt.relative, adapt_movement_scale=opt.adapt_scale, ) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) p = np.transpose(out["prediction"].data.cpu().numpy(), [0, 2, 3, 1])[0] p = p * 255 p = p.astype(np.uint8) p = cv2.cvtColor(p, cv2.COLOR_BGR2RGB) vout.write(p) frame_counter += 1 percent = int(frame_counter / frames_count * 100) now = time.time() if percent != last_percent and now - last_time > 1: last_percent = percent last_time = now send_status(opt, task_id, percent=min(100, percent)) LOG.info(f"processed {frame_counter}/{frames_count} frames") if last_percent != 100: send_status(opt, task_id, percent=100) video.release() vout.release() LOG.info(f"Task {task_id} done, file {out_file} written!")
def overfit(config, source_image, driving_video, generator, kp_detector, lowres_video, relative=True, adapt_movement_scale=True, cpu=False): overfit_epochs = 10 train_params = config['train_params'] optimizer_generator = torch.optim.Adam(generator.parameters(), lr=train_params['lr_generator'], betas=(0.5, 0.999)) optimizer_kp_detector = torch.optim.Adam(kp_detector.parameters(), lr=train_params['lr_kp_detector'], betas=(0.5, 0.999)) scheduler_generator = MultiStepLR(optimizer_generator, train_params['epoch_milestones'], gamma=0.1, last_epoch=-1) scheduler_kp_detector = MultiStepLR(optimizer_kp_detector, train_params['epoch_milestones'], gamma=0.1, last_epoch=-1) for epoch in trange(0, overfit_epochs): predictions = [] source = torch.tensor(source_image[np.newaxis].astype( np.float32)).permute(0, 3, 1, 2) if not cpu: source = source.cuda() driving = torch.tensor( np.array(driving_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) lowres = torch.tensor( np.array(lowres_video)[np.newaxis].astype(np.float32)).permute( 0, 4, 1, 2, 3) kp_source = kp_detector(source) kp_driving_initial = kp_detector(driving[:, :, 0]) for frame_idx in tqdm(range(driving.shape[2])): driving_frame = driving[:, :, frame_idx] lowres_frame = lowres[:, :, frame_idx] if not cpu: driving_frame = driving_frame.cuda() lowres_frame = lowres_frame.cuda() kp_driving = kp_detector(driving_frame) kp_norm = normalize_kp(kp_source=kp_source, kp_driving=kp_driving, kp_driving_initial=kp_driving_initial, use_relative_movement=relative, use_relative_jacobian=relative, adapt_movement_scale=adapt_movement_scale) out = generator(source, kp_source=kp_source, kp_driving=kp_norm) resized_prediction = transforms.Resize( (256, 256))(out['prediction'])[None] loss = F.mse_loss(torch.squeeze(resized_prediction, 0), lowres_frame.detach()) loss.backward() optimizer_generator.step() optimizer_generator.zero_grad() optimizer_kp_detector.step() optimizer_kp_detector.zero_grad() predictions.append( np.transpose(out['prediction'].data.cpu().numpy(), [0, 2, 3, 1])[0]) scheduler_generator.step() scheduler_kp_detector.step() return predictions