def __init__(self, device="cuda:0") -> None: #call the constructeur of VideoFaceDetector class super().__init__() #create a face detection pipeline using MTCNN without margin to add to bounding box, thresholds set according to dataset and device on which to run neural net passes.) #(Multi-Task Cascaded Convolutional Neural Networks is a neural network which detects faces and facial landmarks on images) self.detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
def __init__(self): torch.set_grad_enabled(False) cudnn.benchmark = True self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.mtcnn = MTCNN(image_size=input_image_size, min_face_size=30, prewhiten=True, select_largest=True, device=self.device)
def load_data(path, device): transform = transforms.Compose([ transforms.ToTensor(), ]) face_detector = MTCNN(margin=0, keep_all=False, select_largest=False, thresholds=[0.6, 0.7, 0.7], min_face_size=60, factor=0.8, device=device).eval() video_fd = cv2.VideoCapture(path) if not video_fd.isOpened(): print('problem of reading video') return frame_index = 0 faces = [] success, frame = video_fd.read() while success: cropped_face = detect_face(frame, face_detector) cropped_face = cv2.resize(cropped_face, (64, 64)) if cropped_face is not None: cropped_face = transform(cropped_face) faces.append(cropped_face) frame_index += 1 success, frame = video_fd.read() video_fd.release() print('video frame length:', frame_index) faces = torch.stack(faces, dim=0) faces = torch.unsqueeze(faces, 0) y = torch.ones(frame_index).type(torch.IntTensor) return faces, y
def return_dataset(list, src_dir, output_dir, type='train', fd='dlib'): if fd == 'dlib': face_detector = dlib.cnn_face_detection_model_v1('./mmod_human_face_detector.dat') else: face_detector = MTCNN(margin=0, keep_all=False, select_largest=False, thresholds=[0.6, 0.7, 0.7], min_face_size=60, factor=0.8, device='cuda').eval() dataset = [] for videoName, className in tqdm(list): class_dir = os.path.join(output_dir, type, str(className)) if not os.path.exists(class_dir): os.mkdir(class_dir) video_path = os.path.join(src_dir, videoName) video_fd = cv2.VideoCapture(video_path) if not video_fd.isOpened(): print('Skpped: {}'.format(video_path)) continue frame_index = 0 success, frame = video_fd.read() while success: img_path = os.path.join(output_dir, type, str(className), '%s_%d.png' % (videoName.split('/')[-4] + '_' + videoName.split('/')[-1], frame_index)) cropped_face = detect_face(frame, face_detector, fd) if cropped_face is not None: cv2.imwrite(img_path, cropped_face) info = [str(className), videoName.split('/')[-4] + '_' + videoName.split('/')[-1], img_path] # 将视频帧信息保存起来 dataset.append(info) frame_index += 1 success, frame = video_fd.read() print(frame_index) video_fd.release() return dataset
class FaceExtractor: def __init__(self, use_gpu=False): if use_gpu: device = 'cuda:0' else: device = 'cpu' self.detector = MTCNN(keep_all=True, device=device) def extract_faces(self, frame, display_results=False): img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) boxes, scores = self.detector.detect(img) if boxes is None: return [] if display_results: frame_draw = img.copy() draw = ImageDraw.Draw(frame_draw) faces = [] for box, score in zip(boxes, scores): if box is None or score < FACE_SCORE_THRESHOLD: continue if display_results: draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6) draw.text((box[0], box[1]), str(score)) faces.append(box) if display_results: cv2.imshow("frame", cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR)) cv2.waitKey(1) return faces
def __init__(self): self.__aligner: MTCNN = MTCNN(keep_all=True, thresholds=[0.6, 0.7, 0.9]) self.__facenet_preprocessor: Compose = transforms.Compose( [Whitening()]) self.__facenet: InceptionResnetV1 = InceptionResnetV1( pretrained='vggface2').eval()
class FastFaceExtractor: def __init__(self, video_read_fn): self.video_read_fn = video_read_fn self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device="cuda") def getFrame(self, data): idx, frame, my_idx = data h, w = frame.shape[:2] img = Image.fromarray(frame.astype(np.uint8)) img = img.resize(size=[s // 2 for s in img.size]) batch_boxes, probs = self.detector.detect(img, landmarks=False) faces = [] scores = [] if batch_boxes is None: return dict() for bbox, score in zip(batch_boxes, probs): if bbox is not None: xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox] w = xmax - xmin h = ymax - ymin p_h = h // 3 p_w = w // 3 crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w] faces.append(crop) scores.append(score) frame_dict = { "video_idx": 0, "frame_idx": my_idx, "frame_w": w, "frame_h": h, "faces": faces, "scores": scores } return frame_dict def process_video(self, video, max_workers=16): result = self.video_read_fn(video) if result is None: return [] my_frames, my_idxs = result results = [] with ThreadPoolExecutor(max_workers=max_workers) as pool: futures = [] for i, frame in enumerate(my_frames): future = pool.submit(self.getFrame, (i, frame, my_idxs[i])) futures.append(future) for future in futures: results.append(future.result()) return results
class FaceDetector: def __init__(self): torch.set_grad_enabled(False) cudnn.benchmark = True self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.mtcnn = MTCNN(image_size=input_image_size, min_face_size=30, prewhiten=True, select_largest=True, device=self.device) # self.detector = MTCNN() def pre_process(self, image): """ Redimensiona e preprocessa imagem para extracao de features :param image: imagem do cv2 :return: img_tensor pre-processado para extracao de features """ try: image = cv2.resize(image, (input_image_size, input_image_size), interpolation=cv2.INTER_AREA) except cv2.error: return None img_tensor = functional.to_tensor(np.float32(image)).to(self.device) return prewhiten(img_tensor) # face = F.to_tensor(np.float32(face)) def detect(self, image): """ Realiza deteccao facial e retorna boxes/scores detectados :rtype: numpy.ndarray ou None caso nao nenhuma face seja detectada :param image: imagem (do Pil ou do cv2) para a deteccao :return: arrays boxes com localizacoes das faces e scores, com a probabilidade de presenca de face """ if type(image) == np.ndarray: image = Image.fromarray(image) boxes, scores = self.mtcnn.detect(image) if boxes is not None: boxes = np.rint(boxes).astype(int) return boxes, scores def extract_face(self, image, save_path=None): """ Realiza deteccao facial, extrai a imagem da maior face, e pre-processa a imagem para extracao de features :rtype: torch.tensor :param image: imagem {PIL.Image ou numpy.ndarray do cv2} para a deteccao :param save_path: um caminho para salvar a face detectada (opcional) :return: imagem da face pre-processada """ if type(image) == np.ndarray: image = Image.fromarray(image) return self.mtcnn(image, save_path=save_path, return_prob=True)
class FaceDetector(): def __init__(self, batch_size, thresholds, device=None): self.batch_size = batch_size self.detector = MTCNN(margin=0, thresholds=thresholds, device=device) def detect_faces(self, frames, scale_coef): boxes = [] for i in range(math.ceil(len(frames) / self.batch_size)): batch_boxes, *_ = self.detector.detect(frames[i*self.batch_size:(i + 1)*self.batch_size]) boxes += [(b/scale_coef).astype(int).tolist() if b is not None else None for b in batch_boxes] return boxes
def load_image_classify_emotion(path): """ Loads an image from a path and classifies the face's emotion Parameters ------- path the path of the image Returns ------- 0 if the model determines the image is negative and 1 if the model determines the image is positive """ pic = plt.imread(path) mtcnn = MTCNN() faces = mtcnn.forward(pic.copy()) model = EmotionCNN() model.load_state_dict(torch.load("emotion_model_new.pt")) model.eval() pre_process = transforms.Compose([ transforms.Resize(48), transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) classes = ["negative", "positive"] fig = plt.figure(figsize=(25, 4)) ax = fig.add_subplot() image = np.transpose(faces.numpy(), (1, 2, 0)) image = Image.fromarray((image * 255).astype(np.uint8)) image = pre_process(image) plt.imshow(np.transpose(faces.numpy(), (1, 2, 0))) output = model(image.reshape(1, 1, 48, 48)) prediction = torch.argmax(output, dim=1).item() ax.set_title(f"Predicted:{classes[prediction]}") return pic, prediction
class FacenetDetector(VideoFaceDetector): def __init__(self, device="cuda:0") -> None: super().__init__() self.detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device) @property def _batch_size(self): return 16 def _detect_faces(self, frames) -> List: batch_boxes, *_ = self.detector.detect(frames, landmarks=False) return [b.tolist() if b is not None else None for b in batch_boxes]
class FacenetDetector(VideoFaceDetector): #To create an MTCNN detector that runs on the GPU, the model is instantiated with device='cuda:0' def __init__(self, device="cuda:0") -> None: #call the constructeur of VideoFaceDetector class super().__init__() #create a face detection pipeline using MTCNN without margin to add to bounding box, thresholds set according to dataset and device on which to run neural net passes.) #(Multi-Task Cascaded Convolutional Neural Networks is a neural network which detects faces and facial landmarks on images) self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device) def _detect_faces(self, frames) -> List: batch_boxes, *_ = self.detector.detect(frames, landmarks=False) return [b.tolist() if b is not None else None for b in batch_boxes] @property def _batch_size(self): return 32
def __init__(self, transform=None, labels=None, landmarks=None, bounding_boxes=None, config=None): """ init :param transform: transformations to apply to each image :param labels: ground truth dataframe (y-label) :param landmarks: landmarks dataframe :param bounding_boxes: bounding box dataframe :param config: training configuration file """ self.labels = labels # use landmarks if config.dataset.bounding_box_mode == 0: self.landmarks = landmarks # use bounding boxes elif config.dataset.bounding_box_mode == 1: self.bounding_boxes = bounding_boxes # use face detector elif config.dataset.bounding_box_mode == 2: self.mtcnn = MTCNN( select_largest=False, device=config.basic.cuda_device_name.split(',')[0]) else: raise Exception( "Chose a valid bounding_box_mode (0=landmarks hand-labeled, 1=bbx hand-labeled, 2=bbx detected" ) self.transform = transform assert self.transform is not None, "A basic transformation is needed. i.e. Resize() and ToTensor()" self.config = config
def main(): args = parse_args() trans = transforms.Compose( [preprocessing.ExifOrientationNormalize(), transforms.Resize(1024)]) images = datasets.ImageFolder(root=args.input_folder) images.idx_to_class = {v: k for k, v in images.class_to_idx.items()} create_dirs(args.output_folder, images.classes) mtcnn = MTCNN(prewhiten=False) for idx, (path, y) in enumerate(images.imgs): print("Aligning {} {}/{} ".format(path, idx + 1, len(images)), end='') aligned_path = args.output_folder + os.path.sep + images.idx_to_class[ y] + os.path.sep + os.path.basename(path) if not os.path.exists(aligned_path): img = mtcnn(img=trans(Image.open(path).convert('RGB')), save_path=aligned_path) print("No face found" if img is None else '') else: print('Already aligned')
def create_face_embeddings(image_dir): _logger.info( f"Starting creation of face embeddings in directory {image_dir}") # If required, create a face detection pipeline using MTCNN: mtcnn = MTCNN() # Create an inception resnet (in eval mode): resnet = InceptionResnetV1(pretrained='vggface2').eval() img_paths_and_embeddings = list() img_paths_and_embeddings.append(['Image Path', 'Embedding']) no_faces_found = list() no_faces_found.append(['Image Path']) pbar = tqdm(total=len(os.listdir(image_dir))) batch_size = 128 for file_name_batch in walk_dir_in_batches(image_dir, batch_size=batch_size): for file_name in file_name_batch: # Calculate embedding if file_name.endswith(tuple(image_ext)): img_path = os.path.join(image_dir, file_name) _logger.debug(img_path) img_embedding = get_embedding(img_path, mtcnn, resnet) if img_embedding is None: no_faces_found.append(img_path) else: img_paths_and_embeddings.append( [img_path, img_embedding.detach().cpu().numpy()]) pbar.update(1) pbar.close() _logger.info( f"Starting creation of face embeddings in directory {image_dir}") return img_paths_and_embeddings, no_faces_found
def __init__(self, batch_size, thresholds, device=None): self.batch_size = batch_size self.detector = MTCNN(margin=0, thresholds=thresholds, device=device)
def __init__(self, use_gpu=False): if use_gpu: device = 'cuda:0' else: device = 'cpu' self.detector = MTCNN(keep_all=True, device=device)
import cv2,time from facenet_pytorch.models.mtcnn import MTCNN device = "cpu:0" detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device=device) # detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device) video_capture = cv2.VideoCapture(0) mode = 'landmark' WIDTH = 224 while True: # Capture frame-by-frame start = time.time() ret, frame = video_capture.read() if ret: rate = WIDTH/frame.shape[0] frame = cv2.flip(frame, 1) resized = cv2.resize(frame, None, fx=rate, fy=rate) # print('resize time:',time.time()-start) if mode == 'face': face_boxes, pred = detector.detect(resized, landmarks=False) # print('detect face:', time.time() - start) if face_boxes is not None: for face_box in face_boxes: x0, y0, x1, y1 = [int(t/rate) for t in face_box] cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2) elif mode == 'landmark': face_boxes, pred, landmarks = detector.detect(resized, landmarks=True) if face_boxes is not None: for face_box in face_boxes: x0, y0, x1, y1 = [int(t/rate) for t in face_box] cv2.rectangle(frame, (x0, y0), (x1, y1), (0, 255, 0), 2)
def __init__(self, image_size: int): self._cropper = MTCNN(image_size=image_size, margin=int(0.3 * image_size), device=torch.device("cuda"))
def __init__(self, video_read_fn): self.video_read_fn = video_read_fn self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device="cuda")
class FaceExtractor: def __init__(self, video_read_fn): self.video_read_fn = video_read_fn self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device="cuda") def process_videos(self, input_dir, filenames, video_idxs): videos_read = [] frames_read = [] frames = [] results = [] for video_idx in video_idxs: # Read the full-size frames from this video. filename = filenames[video_idx] video_path = os.path.join(input_dir, filename) result = self.video_read_fn(video_path) # Error? Then skip this video. if result is None: continue videos_read.append(video_idx) # Keep track of the original frames (need them later). my_frames, my_idxs = result frames.append(my_frames) frames_read.append(my_idxs) for i, frame in enumerate(my_frames): h, w = frame.shape[:2] img = Image.fromarray(frame.astype(np.uint8)) img = img.resize(size=[s // 2 for s in img.size]) batch_boxes, probs = self.detector.detect(img, landmarks=False) faces = [] scores = [] if batch_boxes is None: continue for bbox, score in zip(batch_boxes, probs): if bbox is not None: xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox] w = xmax - xmin h = ymax - ymin p_h = h // 3 p_w = w // 3 crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w] faces.append(crop) scores.append(score) frame_dict = { "video_idx": video_idx, "frame_idx": my_idxs[i], "frame_w": w, "frame_h": h, "faces": faces, "scores": scores } results.append(frame_dict) return results def process_video(self, video_path): """Convenience method for doing face extraction on a single video.""" input_dir = os.path.dirname(video_path) filenames = [os.path.basename(video_path)] return self.process_videos(input_dir, filenames, [0])
return img #%% trans = transforms.Compose([transforms.Resize(512)]) trans_cropped = transforms.Compose( [np.float32, transforms.ToTensor(), prewhiten]) #%% dataset = datasets.ImageFolder('dataset/lfw', transform=trans) dataset.idx_to_class = {k: v for v, k in dataset.class_to_idx.items()} loader = DataLoader(dataset, collate_fn=lambda x: x[0]) #%% mtcnn = MTCNN(device=torch.device('cpu')) #%% total_item = len(dataset) names = [] aligned = [] for img, idx in tqdm(loader): name = dataset.idx_to_class[idx] # start = time() img_align = mtcnn( img) #, save_path = "data/aligned/{}/{}.png".format(name, str(idx))) # print('MTCNN time: {:6f} seconds'.format(time() - start)) if img_align is not None: names.append(name) aligned.append(img_align)
def __init__(self, device="cuda:0") -> None: super().__init__() self.detector = MTCNN(margin=0, thresholds=[0.85, 0.95, 0.95], device=device)
def calcEmbedsRec(urlNew): #initialize identified names recognized_names = [] print('Received url: ', urlNew) device = torch.device('cuda:0') print('Running on device: {}'.format(device)) mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True, device=device) #Function takes 2 vectors 'a' and 'b' #Returns the cosine similarity according to the definition of the dot product def cos_sim(a, b): dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) return dot_product / (norm_a * norm_b) #cos_sim returns real numbers,where negative numbers have different interpretations. #So we use this function to return only positive values. def cos(a, b): minx = -1 maxx = 1 return (cos_sim(a, b) - minx) / (maxx - minx) # Define Inception Resnet V1 module (GoogLe Net) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) # Define a dataset and data loader dataset = datasets.ImageFolder('student_data/Test') dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()} loader = DataLoader(dataset, collate_fn=lambda x: x[0]) #Perfom MTCNN facial detection #Detects the face present in the image and prints the probablity of face detected in the image. aligned = [] names = [] for x, y in loader: x_aligned, prob = mtcnn(x, return_prob=True) if x_aligned is not None: print('Face detected with probability: {:8f}'.format(prob)) aligned.append(x_aligned) names.append(dataset.idx_to_class[y]) # Calculate the 512 face embeddings aligned = torch.stack(aligned).to(device) embeddings = resnet(aligned).to(device) # Print distance matrix for classes. #The embeddings are plotted in space and cosine distace is measured. cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6) for i in range(0, len(names)): emb = embeddings[i].unsqueeze(0) # The cosine similarity between the embeddings is given by 'dist'. dist = cos(embeddings[0], emb) dists = [[cos(e1, e2).item() for e2 in embeddings] for e1 in embeddings] # The print statement below is #Helpful for analysing the results and for determining the value of threshold. print(pd.DataFrame(dists, columns=names, index=names)) i = 1 # Haarcascade Classifier is used to detect faces through webcam. #It is preffered over MTCNN as it is faster. Real time basic applications needs to be fast. classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') #Takes 2 vectors 'a' and 'b' . #Returns the cosine similarity according to the definition of the dot product. def cos_sim(a, b): dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) return dot_product / (norm_a * norm_b) #cos_sim returns real numbers,where negative numbers have different interpretations. #So we use this function to return only positive values. def cos(a, b): minx = -1 maxx = 1 return (cos_sim(a, b) - minx) / (maxx - minx) #This is the function for doing face recognition. def verify(embedding, start_rec_time): for i, k in enumerate(embeddings): for j, l in enumerate(embedding): #Computing Cosine distance. dist = cos(k, l) #Chosen threshold is 0.85 #Threshold is determined after seeing the table in the previous cell. if dist > 0.8: #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box). text = names[i] #textOnImg = text + " - Time Elapsed: " + str(int(time.time() - start_rec_time)) + " s" cv2.putText(img1, text, (boxes[j][0].astype(int), boxes[j][3].astype(int) + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2) #cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,0,0), 2) print(text) #if text in names: recognized_names.append(text) #else: textOnImg = "Time Elapsed: " + str( int(time.time() - start_rec_time)) + " s" cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) #Define Inception Resnet V1 module (GoogLe Net) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True, device=device, keep_all=True) #Camera is opened. Webcam video streaming starts. #vs = WebcamVideoStream(src=0).start() print("Camera on") cv2.namedWindow("Detected faces") options = { "CAP_PROP_FRAME_WIDTH": 640, "CAP_PROP_FRAME_HEIGHT": 480, "CAP_PROP_FPS ": 30 } output_params = {"-fourcc": "MJPG", "-fps": 30} writer = WriteGear(output_filename='Output.mp4', compression_mode=False, logging=True, **output_params) #stream = VideoGear(source=0, time_delay=1, logging=True, **options).start() #url = "http://192.168.43.223:8080/shot.jpg" url = urlNew #run face recognition for 1 minute start_face_rec = time.time() end_face_rec = time.time() + 60 while (time.time() < end_face_rec): # frm = stream.read() # if frm is None: # break img_resp = requests.get(url) img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8) img = cv2.imdecode(img_arr, -1) #im= vs.read() #Flip to act as a mirror im = cv2.flip(img, 1) #try: #The resize function of imutils maintains the aspect ratio #It provides the keyword arguments width and heightso the image can be resized to the intended width/height frame = imutils.resize(im, width=400) #Detecting faces using Haarcascade classifier. winlist = pcn.detect(frame) img1 = pcn.draw(frame, winlist) face = list(map(lambda win: crop_face(img1, win, 160), winlist)) face = [f[0] for f in face] #cv2.imshow('Live Feed', img1) cnt = 1 for f in face: #fc, u = crop_face(img, f) print('Printing Face no: ', cnt) cv2.imshow('Detected faces', f) cnt += 1 #faces = classifier.detectMultiScale(face) path = "./student_data/Pics/".format(i) img_name = "image_{}.jpg".format(i) #The captured image is saved. cv2.imwrite(os.path.join(path, img_name), f) imgName = "./student_data/Pics/image_{}.jpg".format(i) # Get cropped and prewhitened image tensor img = Image.open(imgName) i = i + 1 img_cropped = mtcnn(img) boxes, prob = mtcnn.detect(img) img_draw = img.copy() draw = ImageDraw.Draw(img_draw) #print(boxes) #Rectangular boxes are drawn on faces present in the image. #The detected and cropped faces are then saved. if (boxes is not None): for i, box in enumerate(boxes): #draw.rectangle(box.tolist()) extract_face( img, box, save_path='./student_data/Pics/Cropped_Face_{}.jpg'. format(i)) img_draw.save('./student_data/Pics/Faces_Detected.jpg') ima = cv2.imread('./student_data/Pics/Faces_Detected.jpg') #Calculate embeddings of each cropped face. if (img_cropped is not None): img_embedding = resnet(img_cropped.cuda()).to(device) #Call function verify. #Identify the person with the help of embeddings. cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6) verify(img_embedding, start_face_rec) #else: #textForImg = "Time Elapsed: " + str(int(time.time() - start_face_rec)) + " s" #cv2.putText(frame, textForImg, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2) #'Detecting..' window opens. #Rectangular boxes are drawn on detected faces. #The identified faces have their respective name below the box. cv2.imshow('Detecting...', img1) writer.write(img1) if (not face): #cv2.imshow(f"Time Elapsed: ${str(int(time.time() - start_face_rec))} s" ,frame) textForImg = "Time Elapsed: " + str( int(time.time() - start_face_rec)) + " s" cv2.putText(img1, textForImg, (40, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) #print("no face") cv2.imshow('Detecting...', img1) # except: # #In case 'try' doesn't work, "Get the image embedding" text is printed on the screen. # #Run first cell # text="Get the image embeddings" # print(text) # break key = cv2.waitKey(1) #13 is for 'Enter' key. #If 'Enter' key is pressed, all the windows are made to close forcefully. if key == 13: break print("calculating a list of all recognized faces...") rec_names_dict = {i: recognized_names.count(i) for i in recognized_names} filtered_names = [] for key in rec_names_dict: if rec_names_dict[key] > 30: filtered_names.append(key) print("Total Recognized names: ", rec_names_dict) print("Filtered names: ", filtered_names) cv2.destroyAllWindows() writer.close() #vs.stop() #return {i:rec_names_dict[i] for i in filtered_names} return filtered_names
os.environ["OMP_NUM_THREADS"] = "1" from tqdm import tqdm import cv2 cv2.ocl.setUseOpenCL(False) cv2.setNumThreads(0) from deepfake_classifier.classifier.preprocessing.utils import get_original_video_paths from PIL import Image from facenet_pytorch.models.mtcnn import MTCNN import numpy as np detector = MTCNN(margin=0, thresholds=[0.65, 0.75, 0.75], device="cpu") def save_landmarks(ori_id, root_dir): ori_id = ori_id[:-4] ori_dir = os.path.join(root_dir, "crops", ori_id) landmark_dir = os.path.join(root_dir, "landmarks", ori_id) os.makedirs(landmark_dir, exist_ok=True) for frame in range(320): if frame % 10 != 0: continue for actor in range(2): image_id = "{}_{}.png".format(frame, actor) landmarks_id = "{}_{}".format(frame, actor) ori_path = os.path.join(ori_dir, image_id) landmark_path = os.path.join(landmark_dir, landmarks_id)
def _create_test_images(config, df_test_labels, df_test_landmarks, df_test_bounding_boxes, transformer): """ Generates test images based on dataframes :param config: Configuration File :param df_test_labels: labels dataframe :param df_test_landmarks: Landmark dataframe :param df_test_bounding_boxes: bounding boxes dataframe :param transformer: transformer """ create_directory(config.dataset_result_folder, recreate=True) print("created {}".format(config.dataset_result_folder)) pbar = tqdm(range(len(df_test_labels.index))) mtcnn = MTCNN(select_largest=False, post_process=False, device='cuda:0') for index, (i, row) in enumerate(df_test_labels.iterrows()): image = bob.io.base.load('{}/{}'.format( config.dataset.dataset_image_folder, row.name)) landmarks, bounding_boxes = None, None if config.dataset.bounding_box_mode == 0: landmarks = df_test_landmarks.iloc[index].tolist() landmarks = landmarks[:4] + landmarks[6:] elif config.dataset.bounding_box_mode == 1: bounding_boxes = df_test_bounding_boxes.iloc[index].tolist() bounding_boxes = bounding_boxes[1:] elif config.dataset.bounding_box_mode == 2: bounding_boxes, probs, lm = mtcnn.detect(Image.fromarray( np.transpose(image, (1, 2, 0)), 'RGB'), landmarks=True) # print(bounding_boxes) scale = config.dataset.bounding_box_scale # If the MTCNN cannot find a bounding box, we load the bounding box from the disk try: bounding_boxes = bounding_boxes[0] bounding_boxes[2] = bounding_boxes[2] - bounding_boxes[0] bounding_boxes[3] = bounding_boxes[3] - bounding_boxes[1] except: # print(row.name) bounding_boxes = df_test_bounding_boxes.iloc[index].tolist() bounding_boxes = bounding_boxes[1:] bounding_boxes[0] = bounding_boxes[0] - ( (scale - 1) / 2 * bounding_boxes[2]) bounding_boxes[1] = bounding_boxes[1] - ( (scale - 1) / 2 * bounding_boxes[3]) bounding_boxes[2] = scale * (bounding_boxes[2]) bounding_boxes[3] = scale * (bounding_boxes[3]) input = { 'image': image, 'landmarks': landmarks, 'bounding_boxes': bounding_boxes, 'index': index } X = transformer(input) img = tensor_to_image(X) img.save('{}/{}'.format(config.dataset_result_folder, row.name[:-3] + 'png')) pbar.update(1) pbar.close()