def init_hook(ctx, **params): PARAMS.update(params) PARAMS['margin'] = float(PARAMS['margin']) PARAMS['image_size'] = int(PARAMS['image_size']) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device.type) ref_img = cv2.imread(PARAMS['target']) # ref_img = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB) # find face on image face_driver = ctx.drivers[0] boxes = common.get_boxes(face_driver, ref_img) if len(boxes) != 1: raise RuntimeError( 'target image must include exactly 1 face. Provide path via -o target=<path>' ) face = common.crop_by_box(ref_img, boxes[0], margin=PARAMS['margin']) face_box = common.get_crop_box(ref_img, boxes[0], margin=PARAMS['margin']) PARAMS['face_shape'] = face.shape PARAMS['face_box'] = face_box face = cv2.resize(face, (PARAMS['image_size'], PARAMS['image_size']), interpolation=cv2.INTER_AREA) face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) norm_image = torch.from_numpy(np.expand_dims(face, axis=0)).type( dtype=torch.float) # K,256,256,3 norm_image = (norm_image.permute([0, 3, 1, 2]) - 127.5) / 127.5 PARAMS['face'] = norm_image PARAMS['full'] = cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB) LOG.info('Loading torch model...') torch_model = load_torch_model(PARAMS['torch_model']) LOG.info('Done loading torch model.') return fa, torch_model
def main(): args = parse_args() logging.basicConfig( format='%(asctime)s %(levelname)-5s %(name)-10s [-] %(message)s', level='INFO') logging.root.setLevel(logging.INFO) face_driver, landmarks_driver = load_models(args.face_model, args.landmarks_model, landmark_type=args.type) LOG.info('Models loaded.') basename = os.path.splitext(os.path.basename(args.input))[0] dirname = os.path.join(args.output, basename) if not os.path.exists(dirname): os.makedirs(dirname) vc = cv2.VideoCapture(args.input) frame_num = -1 margin = args.margin boxes = {} all_landmarks = {} while True: ret, frame = vc.read() if not ret: break frame_num += 1 face_boxes = common.get_boxes(face_driver, frame) if len(face_boxes) != 1: continue box = face_boxes[0] new_box = common.get_crop_box(frame, box, margin=margin) file_name = os.path.join(dirname, f'{frame_num:05d}.jpg') cropped_frame = frame[new_box[1]:new_box[3], new_box[0]:new_box[2]] cv2.imwrite(file_name, cropped_frame) # Re-compute cropped box coords new_box = [ max(box[0] - new_box[0], 0), max(box[1] - new_box[1], 0), min(new_box[2] - box[0], cropped_frame.shape[1]), min(new_box[3] - box[1], cropped_frame.shape[0]), ] landmarks = common.get_landmarks(landmarks_driver, cropped_frame, np.array(new_box)).astype(float) # draw_points(cropped_frame, landmarks) # cv2.imshow('img', cropped_frame) # k = cv2.waitKey(0) # if k == 27: # break # Get relative coords landmarks[:, 0] = landmarks[:, 0] / cropped_frame.shape[1] landmarks[:, 1] = landmarks[:, 1] / cropped_frame.shape[0] boxes[f'{frame_num:05d}.jpg'] = new_box all_landmarks[f'{frame_num:05d}.jpg'] = landmarks.tolist() if (frame_num + 1) % 100 == 0: LOG.info(f'Processed {frame_num+1} frames.') with open(os.path.join(dirname, 'boxes.json'), 'w') as f: f.write(json.dumps(boxes, indent=2)) with open(os.path.join(dirname, 'landmarks.json'), 'w') as f: f.write(json.dumps(all_landmarks, indent=2))
def process(inputs, ctx, **kwargs): image, is_video = helpers.load_image(inputs, 'input') fa, torch_model = ctx.global_ctx face_driver = ctx.drivers[0] boxes = common.get_boxes(face_driver, image) if len(boxes) == 1: for box in boxes: crop_box = common.get_crop_box(image, box, margin=PARAMS['margin']) cropped = common.crop_by_box(image, box, margin=PARAMS['margin']) resized = cv2.resize(cropped, (PARAMS['image_size'], PARAMS['image_size']), interpolation=cv2.INTER_AREA) landmarks = fa.get_landmarks_from_image(image, [crop_box])[0] landmarks -= [crop_box[0], crop_box[1]] x_factor, y_factor = ( crop_box[2] - crop_box[0]) / PARAMS['image_size'], ( crop_box[3] - crop_box[1]) / PARAMS['image_size'] landmarks /= [x_factor, y_factor] landmark_img = video_extraction_conversion.draw_landmark( landmarks, size=(PARAMS['image_size'], PARAMS['image_size'], 3)) norm_image = torch.from_numpy(np.expand_dims( resized, axis=0)).type(dtype=torch.float) # K,256,256,3 norm_mark = torch.from_numpy(np.expand_dims( landmark_img, axis=0)).type(dtype=torch.float) # K,256,256,3 norm_image = (norm_image.permute([0, 3, 1, 2]) - 127.5) / 127.5 norm_mark = (norm_mark.permute([0, 3, 1, 2]) - 127.5) / 127.5 # K,3,256,256 t = time.time() with torch.no_grad(): outputs = torch_model(PARAMS['face'], norm_mark) LOG.info(f'model time: {time.time() - t}') t = time.time() output = get_picture(outputs) # cv2.imwrite( # 'VV.jpg', np.hstack([ # get_picture(outputs)[:, :, ::-1], # get_picture(norm_mark)[:, :, ::-1], # get_picture(norm_image)[:, :, ::-1], # get_picture(PARAMS['face'])[:, :, ::-1] # ]) # ) # import sys; sys.exit(1) output = cv2.resize( output, (PARAMS['face_shape'][1], PARAMS['face_shape'][0]), interpolation=cv2.INTER_AREA) # LOG.info(f'get and resize: {time.time() - t}') # t = time.time() face_box = PARAMS['face_box'] image = PARAMS['full'].copy() image[face_box[1]:face_box[3], face_box[0]:face_box[2]] = output # mask = np.ones_like(output) * 255 # center_box = ((face_box[2] + face_box[0]) // 2, (face_box[3] + face_box[1]) // 2) # image = cv2.seamlessClone(output, PARAMS['full'], mask, center_box, cv2.NORMAL_CLONE) # LOG.info(f'seamless clone: {time.time() - t}') else: image = PARAMS['full'] if is_video: output = image else: _, buf = cv2.imencode('.jpg', image[:, :, ::-1]) output = buf.tostring() return {'output': output}
def process_text_dir(self, video_dir, video_url, output_dir, fa): if os.path.exists(output_dir): if self.validate_video_dir(video_url, output_dir): self.sem.release() return txt_paths = glob.glob(os.path.join(video_dir, '*.txt')) tmp = tempfile.mktemp(suffix='.mp4') try: out_path = tmp ydl_opts = { 'format': 'best[height<=480]', 'outtmpl': out_path, 'noprogress': True, } if self.cookie: ydl_opts['cookiefile'] = self.cookie with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([video_url]) vc = cv2.VideoCapture(out_path) fps = vc.get(cv2.CAP_PROP_FPS) landmarks = [] prev_frame = None subvideo = 0 for txt_path in txt_paths: frames = self.parse_txt(txt_path) final_output_dir = os.path.join(output_dir, str(subvideo)) os.makedirs(final_output_dir, exist_ok=True) save_frame_num = 0 first_box = None for data in frames: frame_num, _, _, _, _ = data real_frame_num = round(frame_num / 25 * fps) # read frame by real frame number if prev_frame is not None and real_frame_num > prev_frame: while real_frame_num - prev_frame > 1: vc.grab() prev_frame += 1 else: vc.set(cv2.CAP_PROP_POS_FRAMES, real_frame_num) ret, frame = vc.read() if not ret: break boxes = common.get_boxes(self.face_driver, frame, threshold=.9) if len(boxes) != 1: continue box = boxes[0] if first_box is None: first_box = box.copy() # check face area if (box[2] - box[0]) * (box[3] - box[1]) < self.min_face_size * self.min_face_size: continue if intersect_area(first_box, box) < 0.4: # flush landmarks to final_output_dir. if len(landmarks) > self.k: LOG.info(f'Saved {len(landmarks)} frames/landmarks in {final_output_dir}') np.save(os.path.join(final_output_dir, 'landmarks.npy'), np.array(landmarks)) else: shutil.rmtree(final_output_dir) landmarks = [] first_box = None subvideo += 1 final_output_dir = os.path.join(output_dir, str(subvideo)) os.makedirs(final_output_dir, exist_ok=True) save_frame_num = 0 # save frame file_name = f'{save_frame_num:05d}.jpg' cropped_ratio = 0.4 w = box[2] - box[0] h = box[3] - box[1] new_box = np.array([ max(round(box[0] - cropped_ratio * w), 0), max(round(box[1] - cropped_ratio * h), 0), min(round(box[2] + w * cropped_ratio), frame.shape[1]), min(round(box[3] + h * cropped_ratio), frame.shape[0]), ]).astype(np.int) cropped_frame = frame[new_box[1]:new_box[3], new_box[0]:new_box[2]] # get landmarks and accumulate them. new_face_box = np.array([ box[0] - new_box[0], box[1] - new_box[1], new_box[2] - box[2] + w, new_box[3] - box[3] + h, ]).astype(int) # get landmarks from RGB frame and accumulate them. lmark = fa.get_landmarks_from_image(cropped_frame[:, :, ::-1], [new_face_box]) if len(lmark) == 0: continue landmarks.append(lmark[0]) cv2.imwrite(os.path.join(final_output_dir, file_name), cropped_frame) prev_frame = real_frame_num save_frame_num += 1 # cv2.rectangle( # cropped_frame, # (new_face_box[0], new_face_box[1]), # (new_face_box[2], new_face_box[3]), # (0, 250, 0), thickness=1, lineType=cv2.LINE_AA # ) # draw_points(cropped_frame, lmark[0]) # cv2.imshow('Video', cropped_frame) # key = cv2.waitKey(0) # if key == 27: # return subvideo += 1 # flush landmarks to final_output_dir. if len(landmarks) > self.k: np.save(os.path.join(final_output_dir, 'landmarks.npy'), np.array(landmarks)) LOG.info(f'Saved {len(landmarks)} frames/landmarks in {final_output_dir}') else: shutil.rmtree(final_output_dir) landmarks = [] except (youtube_dl.utils.ExtractorError, youtube_dl.utils.DownloadError) as e: if '429' in str(e): self.sem.release() self.stopped = True raise LOG.info(e) except Exception as e: LOG.exception(e) finally: if os.path.exists(tmp): os.remove(tmp) self.sem.release() LOG.info(f'End processing video {video_url}')
def process_video(self, video_path, output_dir, fa): if os.path.exists(output_dir): if self.validate_video_dir(video_path, output_dir): self.sem.release() return vc = cv2.VideoCapture(video_path) landmarks = [] subvideo = 0 final_output_dir = os.path.join(output_dir, str(subvideo)) os.makedirs(final_output_dir, exist_ok=True) save_frame_num = 0 first_box = None while True: ret, frame = vc.read() if not ret: break boxes = common.get_boxes(self.face_driver, frame, threshold=.8) if len(boxes) != 1: continue box = boxes[0] if first_box is None: first_box = box.copy() # check face area if (box[2] - box[0]) * (box[3] - box[1]) < self.min_face_size * self.min_face_size: continue if intersect_area(first_box, box) < 0.3: # flush landmarks to final_output_dir. if len(landmarks) > self.k: LOG.info(f'Saved {len(landmarks)} frames/landmarks in {final_output_dir}') np.save(os.path.join(final_output_dir, 'landmarks.npy'), np.array(landmarks)) else: shutil.rmtree(final_output_dir) landmarks = [] first_box = None subvideo += 1 final_output_dir = os.path.join(output_dir, str(subvideo)) os.makedirs(final_output_dir, exist_ok=True) save_frame_num = 0 # save frame file_name = f'{save_frame_num:05d}.jpg' cropped_ratio = 0.4 w = box[2] - box[0] h = box[3] - box[1] new_box = np.array([ max(round(box[0] - cropped_ratio * w), 0), max(round(box[1] - cropped_ratio * h), 0), min(round(box[2] + w * cropped_ratio), frame.shape[1]), min(round(box[3] + h * cropped_ratio), frame.shape[0]), ]).astype(np.int) cropped_frame = frame[new_box[1]:new_box[3], new_box[0]:new_box[2]] # get landmarks and accumulate them. new_face_box = np.array([ box[0] - new_box[0], box[1] - new_box[1], new_box[2] - box[2] + w, new_box[3] - box[3] + h, ]).astype(int) # get landmarks from RGB frame and accumulate them. lmark = fa.get_landmarks_from_image(cropped_frame[:, :, ::-1], [new_face_box]) if len(lmark) == 0: continue landmarks.append(lmark[0]) cv2.imwrite(os.path.join(final_output_dir, file_name), cropped_frame) save_frame_num += 1 # cv2.rectangle( # cropped_frame, # (new_face_box[0], new_face_box[1]), # (new_face_box[2], new_face_box[3]), # (0, 250, 0), thickness=1, lineType=cv2.LINE_AA # ) # draw_points(cropped_frame, lmark[0]) # cv2.imshow('Video', cropped_frame) # key = cv2.waitKey(0) # if key == 27: # return # flush landmarks to final_output_dir. if len(landmarks) > self.k: np.save(os.path.join(final_output_dir, 'landmarks.npy'), np.array(landmarks)) LOG.info(f'Saved {len(landmarks)} frames/landmarks in {final_output_dir}') else: shutil.rmtree(final_output_dir) self.sem.release() LOG.info(f'End processing video {video_path}')