def detect_faces_batch(images): detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = 32 while 1: predictions = [] try: for i in range(0, len(images), batch_size): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]), True)) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue break del detector return predictions
def face_detect(images): if args.cartoon: model = insightface.model_zoo.get_model('retinaface_r50_v1') model.prepare(ctx_id=-1, nms=0.4) predictions = [] for image in images: bbox, landmark = model.detect(image, threshold=0.3, scale=1.0) bbox = np.array(bbox).astype(np.int)[:, :-1] predictions.append(tuple(bbox[0].tolist())) else: detector = face_detection.FaceAlignment( face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = args.face_det_batch_size while 1: predictions = [] try: for i in tqdm(range(0, len(images), batch_size)): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]))) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue break results = [] pady1, pady2, padx1, padx2 = args.pads for rect, image in zip(predictions, images): if rect is None: cv2.imwrite( 'temp/faulty_frame.jpg', image) # check this frame where the face was not detected. raise ValueError( 'Face not detected! Ensure the video contains a face in all the frames.' ) y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) results.append([x1, y1, x2, y2]) boxes = np.array(results) if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5) results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)] if not args.cartoon: del detector return results
def face_detect(images): detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = args.face_det_batch_size while 1: predictions = [] try: for i in tqdm(range(0, len(images), batch_size)): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]))) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue break results = [] pady1, pady2, padx1, padx2 = args.pads for rect, image in zip(predictions, images): if rect is None and args.ignorefaceless: y1 = max(0, 0) y2 = min(0, 0) x1 = max(0, 0) x2 = min(0, 0) elif rect is None: cv2.imwrite( 'temp/faulty_frame.jpg', image) # check this frame where the face was not detected. raise ValueError( 'Face not detected! Ensure the video contains a face in all the frames.' ) else: y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) results.append([x1, y1, x2, y2]) boxes = np.array(results) if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5) results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)] del detector return results
def face_detect_rect_first_one_only(images_ori): nosmooth = False pads = [0, 0, 0, 0] face_det_batch_size = 1 images = images_ori[:1] detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = face_det_batch_size while 1: predictions = [] try: for i in range(0, len(images), batch_size): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]))) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue break results = [] pady1, pady2, padx1, padx2 = pads for rect, image in zip(predictions, images): if rect is None: # check this frame where the face was not detected. # cv2.imwrite('temp/faulty_frame.jpg', image) print( 'Face not detected! Ensure the video contains a face in all the frames.' ) return [] y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) # y_gap, x_gap = (y2 - y1)//2, (x2 - x1)//2 y_gap = min((y2 - y1) // 2, y1, image.shape[0] - y2) x_gap = (((y2 - y1) + y_gap * 2) - (x2 - x1)) // 2 coords = y1 - y_gap, y2 + y_gap, x1 - x_gap, x2 + x_gap #print(coords) # coords = [coords_[0], coords_[0]+1024, coords_[2], coords_[2]+1024] #results.append(image[y1-y_gap: y2+y_gap, x1-x_gap:x2+x_gap]) results = [ image[coords[0]:coords[1], coords[2]:coords[3]] for image in images_ori ] del detector return results
def detect_faces(image): detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device=device) predictions = [] while 1: try: predictions.extend( detector.get_detections_for_batch(np.array(image), False)) break except: pass del detector return predictions
from glob import glob import audio from hparams import hparams as hp import face_detection parser = argparse.ArgumentParser() parser.add_argument('--ngpu', help='Number of GPUs across which to run in parallel', default=1, type=int) parser.add_argument('--batch_size', help='Single GPU Face detection batch size', default=32, type=int) parser.add_argument("--data_root", help="Root folder of the LRS2 dataset", required=True) parser.add_argument("--preprocessed_root", help="Root folder of the preprocessed dataset", required=True) args = parser.parse_args() fa = [face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda:{}'.format(id)) for id in range(args.ngpu)] template = 'ffmpeg -loglevel panic -y -i {} -strict -2 {}' # template2 = 'ffmpeg -hide_banner -loglevel panic -threads 1 -y -i {} -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 {}' def process_video_file(vfile, args, gpu_id): video_stream = cv2.VideoCapture(vfile) frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break frames.append(frame)
import torch, face_detection import scipy, cv2, os, sys, argparse, audio import numpy as np from tqdm import tqdm face = '../videos/main/fps_corrected_video/video-3-0-4a.mp4' video_stream = cv2.VideoCapture(face) fps = video_stream.get(cv2.CAP_PROP_FPS) crop = [0, -1, 0, -1] print('Reading video frames...') device = 'cuda' if torch.cuda.is_available() else 'cpu' detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cpu') full_frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break y1, y2, x1, x2 = crop if x2 == -1: x2 = frame.shape[1] if y2 == -1: y2 = frame.shape[0] frame = frame[y1:y2, x1:x2] full_frames.append(frame) predictions = []
parser.add_argument('--batch_size', help='Single GPU Face detection batch size', default=32, type=int) parser.add_argument("--data_root", help="Root folder of the LRS2 dataset", required=True) parser.add_argument("--preprocessed_root", help="Root folder of the preprocessed dataset", required=True) args = parser.parse_args() fa = [ face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda:{}'.format(id)) for id in range(args.ngpu) ] template = 'ffmpeg -loglevel panic -y -i {} -strict -2 {}' # template2 = 'ffmpeg -hide_banner -loglevel panic -threads 1 -y -i {} -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 {}' def process_video_file(vfile, args, gpu_id): video_stream = cv2.VideoCapture(vfile) frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading:
def face_detect_rect(images, images_ori): nosmooth = False pads = [0, 0, 0, 0] face_det_batch_size = 32 detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = face_det_batch_size while 1: predictions = [] try: for i in range(0, len(images), batch_size): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]))) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue break all_coords = [] results = [] pady1, pady2, padx1, padx2 = pads for rect, image in zip(predictions, images): if rect is None: # check this frame where the face was not detected. cv2.imwrite('temp/faulty_frame.jpg', image) print( 'Face not detected! Ensure the video contains a face in all the frames.' ) return [] y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) y_gap, x_gap = (y2 - y1) // 6, (x2 - x1) // 6 all_coords.append(np.array([y1, y2, x1, x2])) results.append(image[y1 - y_gap:y2 + y_gap, x1 - x_gap:x2 + x_gap]) var = np.var(all_coords, axis=0) del detector if sum(var) > 100: results = [cv2.resize(img, (128, 128)) for img in results] else: first_rect = predictions[0] y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) y_gap, x_gap = (y2 - y1) // 6, (x2 - x1) // 6 results = [ cv2.resize(img[y1 - y_gap:y2 + y_gap, x1 - x_gap:x2 + x_gap], (128, 128)) for img in images ] return results
def face_detect(images, video): if not args.cache: detector = face_detection.FaceAlignment( face_detection.LandmarksType._2D, flip_input=False, device=device) batch_size = args.face_det_batch_size while 1: predictions = [] try: for i in tqdm(range(0, len(images), batch_size)): predictions.extend( detector.get_detections_for_batch( np.array(images[i:i + batch_size]), video)) except RuntimeError: if batch_size == 1: raise RuntimeError( 'Image too big to run face detection on GPU. Please use the --resize_factor argument' ) batch_size //= 2 print('Recovering from OOM error; New batch size: {}'.format( batch_size)) continue del detector break else: df = pd.read_csv(args.cache) preds = df.values.tolist() if len(preds) == 1: predictions = [[eval(pred) for pred in preds[0]]] elif len(preds) > 1: predictions = [[eval(pred)] for batch in preds for pred in batch] if args.static or args.static_video: face = predictions[0][args.person] predictions = [face] else: tmp = [] for pred in predictions: face = pred[0] tmp.append(face) predictions = tmp results = [] pady1, pady2, padx1, padx2 = args.pads for rect, image in zip(predictions, images): if rect is None: cv2.imwrite( 'temp/faulty_frame.jpg', image) # check this frame where the face was not detected. raise ValueError( 'Face not detected! Ensure the video contains a face in all the frames.' ) y1 = max(0, rect[1] - pady1) y2 = min(image.shape[0], rect[3] + pady2) x1 = max(0, rect[0] - padx1) x2 = min(image.shape[1], rect[2] + padx2) results.append([x1, y1, x2, y2]) boxes = np.array(results) if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5) results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)] return results
import dlib import sys import os import face_detection # print("Dlib using cuda?") # print(dlib.DLIB_USE_CUDA) face_detector_path = sys.argv[1] frames_path = sys.argv[2] save_path = sys.argv[3] #cnn_face_detector = dlib.cnn_face_detection_model_v1(face_detector_path) fa = face_detection.FaceAlignment(face_detection.LandmarksType._2D) print('Reading video frames...') numImg = len([ name for name in os.listdir(frames_path) if os.path.isfile(os.path.join(frames_path, name)) ]) from_first = True with open(save_path, 'w+') as out: for f in range(1, numImg + 1): number = '{0:04d}'.format(f) filename = os.path.join(frames_path, "frames" + number + ".jpg") img = dlib.load_rgb_image(filename) dets = fa.get_detections_for_image(img)