def get_faces(detector, images, box, args): ret_faces = [] all_boxes = [] avg_box = None all_imgs = [] if box is None: # Get bounding boxes print('Getting bounding boxes') for lb in tqdm(np.arange(0, len(images), args.mtcnn_batch_size)): imgs_pil = [Image.fromarray(image) for image in images[lb:lb+args.mtcnn_batch_size]] boxes, _, _ = detector.detect(imgs_pil, landmarks=True) all_boxes.extend(boxes) all_imgs.extend(imgs_pil) # Check if boxes are fine, do temporal smoothing, return average box. img_size = (all_imgs[0].size[0] + all_imgs[0].size[1]) / 2 stat, avg_box = check_boxes(all_boxes, img_size, args) else: all_imgs = [Image.fromarray(image) for image in images] stat, avg_box = True, box # Crop face regions. if stat: print('Extracting faces') for img in tqdm(all_imgs, total=len(all_imgs)): face = extract_face(img, avg_box, args.cropped_image_size, args.margin) ret_faces.append(face) return stat, ret_faces, avg_box
def get_emb(emb_state, image, box): if emb_state is not None: return (emb_state) else: cropped_face = extract_face(image, box) cropped_face = prewhiten(cropped_face) emb = resnet(cropped_face.unsqueeze(0))[0].detach() # .numpy().reshape(1, 512) return (emb)
def crop_and_resize_face(source_image, instances, face_detector, use_rendered, rendered_image, target_shape=(160, 160)): face_mask = torch.logical_or(torch.ge(instances, 23), torch.ge(instances, 24)) instances_masked = torch.mul(instances, face_mask) face_indicies = torch.nonzero(instances_masked, as_tuple=True) resize_diff = int((target_shape[0] - target_shape[1]) / 2) if torch.numel(face_indicies[0]) == 0 or torch.numel( face_indicies[1]) == 0: return 0 else: xmin, xmax = [ torch.min(face_indicies[0]).item(), torch.max(face_indicies[0]).item() ] ymin, ymax = [ torch.min(face_indicies[1]).item(), torch.max(face_indicies[1]).item() ] cropped_face = source_image[:, xmin:xmax, ymin:ymax] cropped_face = cropped_face.permute( (1, 2, 0)).add(1).div(2).mul(255).cpu().numpy() try: box = face_detector.detect(cropped_face)[0][0] if use_rendered: cropped_rendered_face = rendered_image[:, xmin:xmax, ymin:ymax] cropped_rendered_face = cropped_rendered_face.permute( (1, 2, 0)).add(1).div(2).mul(255).detach().cpu().numpy() cropped_face = extract_face(cropped_rendered_face, box, image_size=target_shape[0]) else: cropped_face = extract_face(cropped_face, box, image_size=target_shape[0]) cropped_face = cropped_face[:, resize_diff:target_shape[0] - resize_diff, :] return cropped_face.cuda() except: return 0
def embeddings(self, path): video = mmcv.VideoReader(path) frames = [ Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video[1:] ] a = dict() for i in range(self.total_people): a[i] = [] count = 0 for frame in frames: bouding_box, prob = self.mtcnn.detect(frame) for box in bouding_box: x1, y1, x2, y2 = box if (x1 > x2): x1, x2 = x2, x1 if (y1 > y2): y1, y2 = y2, y1 cropped_tensors = extract_face(frame, (x1, y1, x2, y2)).to( self.device).view(-1, 3, 160, 160) emb = self.resnet(cropped_tensors) emb = emb.detach() if self.device.type == "cuda": emb = emb.cpu() emb = emb.numpy() idx = -1 min_dist = 10**9 for i, e in enumerate(self.embeddings_initial): d = emb - e d = d.reshape(512) # https://github.com/cmusatyalab/openface/blob/master/demos/compare.py dist = np.dot( d, d ) # https://cmusatyalab.github.io/openface/demo-2-comparison/ if (dist < min_dist): idx = i min_dist = dist a[idx].append(emb) # testing for face tracking crop = frame.crop((x1, y1, x2, y2)) crop = cv2.cvtColor(np.array(crop), cv2.COLOR_RGB2BGR) cv2.imshow(str(idx), crop) cv2.waitKey(1) print(len(a[0])) return a
def draw_one(self, frame, box, prob, landmark, count): """ Draw landmarks and boxes for only one face detected """ im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2 imwrite no need for RGB conversion cv2.imwrite( configs['frames_folder'] + "original" + str(count) + ".png", frame) cropped_img = extract_face(im_rgb, box, image_size=224, save_path=configs['frames_folder'] + str(count) + ".png") # Draw rectangle on frame cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) transform = get_test_augmentations() # (C, H, W) -> (H, W, C) transformed_img = transform( image=np.array(cropped_img).transpose((1, 2, 0)))['image'] # add batch dim transformed_img = transformed_img.unsqueeze(0) # transformed_img = transformed_img.to(device) cue = self.model.infer(transformed_img) # save cues save_image(cue, configs['frames_folder'] + "cues/" + str(count) + ".png") score = cue.mean().cpu().item() # Show probability cv2.putText(frame, "FDet: " + "{:.3f}".format(prob), (box[0], int(box[3])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(frame, "Spoof Score: " + "{:.6f}".format(score), (box[0], int(box[3] + 30.0)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) # Draw landmarks # cv2.circle(frame, tuple(ld[0]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[1]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[2]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[3]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[4]), 5, (0, 0, 255), -1) return frame
def detectEmbed(width, height, bytedata): img = createImage(width, height, bytedata) boxes, probs = mtcnn.detect(img) if boxes is None: return None, None, None embeddings = [] for i, box in enumerate(boxes): img_cropped = extract_face(img, box) img_embedding = resnet(img_cropped.unsqueeze(0)) embeddings.append(img_embedding.cpu().detach().numpy()) embeddings = np.array(embeddings) return boxes, probs, embeddings
def face_match(self, image, classify_model, person_names): box_dr = [] text_dr = [] mark_dr = [] try: bboxes, prob, landmarks = self.mtcnn_pt.detect(image, landmarks=True) except Exception as ex: with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr return box_dr, text_dr, mark_dr if bboxes is None: with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr return box_dr, text_dr, mark_dr for idx, box in enumerate(bboxes): if prob[idx] > 0.90: # if face detected and probability > 90% box_dr.append(box) mark_dr.append(landmarks[idx]) face = extract_face(image, box, image_size=self.mtcnn_pt.image_size, margin=self.mtcnn_pt.margin) face = fixed_image_standardization(face) emb = self.resnet( face.unsqueeze(0) ) # passing cropped face into resnet model to get embedding matrix emb_array = emb.detach().numpy() predictions = classify_model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if best_class_probabilities[0] > self.accuracy_th: text = '{0}: {1:.0%}'.format( person_names[best_class_indices[0]], best_class_probabilities[0]) else: text = '{0}'.format('Unknown') text_dr.append(text) elif prob[idx] > 0.10: continue else: continue with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr self.mark_draw[0] = mark_dr self.new_boxes = True return box_dr, text_dr, mark_dr
def detect_face(img_dir='vgg2', margin=40): mtcnn = MTCNN(select_largest=True, device=device) img_dir = os.path.join(os.path.join(cur_path, img_dir)) dir_lists = [os.path.join(img_dir, x) for x in os.listdir(img_dir)] dir_lists.sort() save_path = os.path.join(cur_path, img_dir + 'save_path') for dir_list in dir_lists[::-1]: begin = time.time() dir_save = os.path.join(save_path, dir_list.split('/')[-1]) if not os.path.exists(dir_save): os.makedirs(dir_save) else: continue img_paths = [ os.path.join(dir_list, x) for x in os.listdir(dir_list) if x.endswith('.jpg') ] img_paths.sort() print(dir_list) for img_path in img_paths: img = Image.open(img_path) start = time.time() boxes, _ = mtcnn.detect(img) draw = ImageDraw.Draw(img) w, h = img.size if boxes is None: continue for box in boxes: offset = margin / 2 box[0],box[1],box[2],box[3]= max(box[0]-offset,0),max(box[1]-offset,0),\ min(box[2]+offset,w),min(box[3]+offset,h) extract_face(img, box, save_path=os.path.join( dir_save, os.path.split(img_path)[-1])) #---output face shape is (160,160,3) end = time.time() print('img {} has token {:.2f}s'.format(img_path, end - start))
def predict(): if request.method == 'POST': payload = request.get_json() images_b64 = payload['images'] ids = payload['ids'] db_id = payload['db_id'] images = [] for im_b64 in images_b64: im_binary = base64.b64decode(im_b64) images.append(im_binary) del images_b64 gc.collect() probs, bbox = validate_images(images) filtered_images = [] filtered_idxs = [] output = [] idx = 0 transform_tensor_to_image = transforms.ToPILImage() for i in range(len(probs)): if probs[i] is None: entry = {} entry['id'] = ids[i] entry['prob'] = None entry['class_id'] = None entry['class_name'] = None output[idx] = entry idx = idx + 1 else: print(bbox[i][0]) face = extract_face(Image.open(io.BytesIO(images[i])), bbox[i][0]) img = transform_tensor_to_image(face.cpu()) filtered_images.append(img) filtered_idxs.append(ids[i]) del images gc.collect() class_id, class_name, probs = get_prediction(db_id, filtered_images) del filtered_images gc.collect() for i in range(len(class_id)): entry = {} entry['id'] = filtered_idxs[i] entry['prob'] = probs[i] entry['class_id'] = class_id[i] entry['class_name'] = class_name[i] output.append(entry) idx = idx + 1 return jsonify(output)
def _draw(self, frame, boxes, probs, landmarks, count): """ Draw landmarks and boxes for each face detected """ im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2 imwrite no need for RGB conversion cv2.imwrite( configs['frames_folder'] + "original" + str(count) + ".png", frame) for box, prob, ld in zip(boxes, probs, landmarks): cropped_img = extract_face(im_rgb, box, image_size=224, save_path=configs['frames_folder'] + str(count) + ".png") # Draw rectangle on frame cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) transform = get_test_augmentations() # (C, H, W) -> (H, W, C) transformed_img = transform( image=np.array(cropped_img).transpose((1, 2, 0)))['image'] # add batch dim transformed_img = transformed_img.unsqueeze(0) output = self.model.classify(transformed_img) prediction = torch.argmax(output, dim=1).cpu().numpy() # Show probability cv2.putText(frame, "FDet: " + str(prob), (box[2], int(box[3] - 30.0)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(frame, str(labels_map.get(prediction[0])), (box[2], box[3]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) # Draw landmarks # cv2.circle(frame, tuple(ld[0]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[1]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[2]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[3]), 5, (0, 0, 255), -1) # cv2.circle(frame, tuple(ld[4]), 5, (0, 0, 255), -1) return frame
def annotate_apply_tracking(self, frame, font_size=FONT_SIZE, box_size=1): detections = self.prev_dets if (self.stride % 4 == 0): detections = self.apply_tracking(frame.resize(DETECTION_SIZE)) #detections = self.apply_tracking(frame) self.prev_dets = detections self.stride = self.stride + 1 self.stride = self.stride + 1 annotated = frame.copy() draw = ImageDraw.Draw(annotated) for j in range(len(detections)): detection_box = np.multiply(detections[j][:4], RESIZE_FACTOR) #detection_box = detections[j][:4] track_id = detections[j][4] num_frames = self.frames_per_track.get(track_id, 0) + 1 self.frames_per_track[track_id] = num_frames label = self.face_labels.get(track_id) if label is None or num_frames % 7 == 0: embedding_list = self.track_embeddings.get(track_id, []) if len(embedding_list) < 5: face = extract_face(frame, detection_box) embedding = self.rec_model(face[None, :, :, :].to(self.device)).detach() if embedding is not None: embedding_list.append(embedding) self.track_embeddings[track_id] = embedding_list avg_embedding = torch.mean(torch.stack(embedding_list), dim=0) name = self.identify_user(avg_embedding) self.face_labels.update({track_id: name}) draw.rectangle( [detection_box[0], detection_box[1], detection_box[2], detection_box[3]], outline=(0, 255, 0), width=box_size) draw.text( (detection_box[0], detection_box[3]), str(self.face_labels.get(track_id, "bad_embedding") + "_" + str(track_id)), font=ImageFont.truetype(TRUE_TYPE, font_size), fill=(0, 255, 0)) return annotated
def detectAndConvert(self, frame): """ Function that handles the actual face detection. Detected faces are converted to tensors. Amount of detected faces can be found with len(self.detected_person) """ image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) boxes, probas = self.detector.detect(image) if boxes is not None: for box in boxes: face = extract_face(frame, box) prediction = self.recognizer.predict( fixed_image_standardization(face)) print(prediction) if self.__debug: frame = cv2.rectangle( frame, (box[0], box[1]), (box[2], box[3]), (255, 0, 0)) # Draw a rectangle arround the face cv2.putText(frame, f'{prediction[0]}', (int(box[0]), int(box[1] - 10)), cv2.FONT_HERSHEY_COMPLEX, 1, (200, 0, 0))
def preprocessing(self, path): video = mmcv.VideoReader(path) frame0 = Image.fromarray( cv2.cvtColor(video[0], cv2.COLOR_BGR2RGB) ) # Assumption - First frame has all the speaker's faces. bouding_box, prob = self.mtcnn.detect(frame0) print(bouding_box) print(prob) self.total_people = 0 for box in bouding_box: x1, y1, x2, y2 = box if (x1 > x2): x1, x2 = x2, x1 if (y1 > y2): y1, y2 = y2, y1 print(x1, y1) print(x2, y2) cropped = frame0.crop((x1, y1, x2, y2)) cropped.save("preprocessing/faces/Cropped" + str(self.total_people) + ".png") cropped_tensors = extract_face(frame0, (x1, y1, x2, y2)).to( self.device).view(-1, 3, 160, 160) cropped_tensors = self.resnet(cropped_tensors) cropped_tensors = cropped_tensors.detach() if self.device.type == "cuda": cropped_tensors = cropped_tensors.cpu() cropped_tensors = cropped_tensors.numpy() self.embeddings_initial.append(cropped_tensors) self.total_people += 1 # Seperate video with no audio command = f'ffmpeg -i {path} -r 25 -c copy -an preprocessing/video/a.mp4' subprocess.call(command, shell=True)
def detectAndCrop(self, frame): if isinstance(frame, np.ndarray): frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) mtcnn_module = MTCNN(keep_all=True) boxes, prob = mtcnn_module.detect(frame) faces = [] count = 0 for box, prob in zip(boxes, prob): if prob > self.threshold: face = extract_face(frame, box) print("Face #{} detected with probability : {}".format( count + 1, prob)) faces.append({"bbox": box, "prob": prob}) count = count + 1 if self.saveIn is not None: img = self.toPIL(face).convert('RGB') img.save( os.path.join(self.saveIn, "face_{}.jpg".format(count))) return faces
def __call__(self, frame) -> NoReturn: mtcnn = MTCNN( keep_all=True, min_face_size=100, image_size=160, margin=14, selection_method="center_weighted_size", post_process=True, device=self.device, ) boxes, probs = mtcnn.detect(frame) faces = [] if boxes is None: return faces for i, box in enumerate(boxes): if probs[i] < 0.93: continue box = box.astype(int) faces.append( Face(box=box, labels={}, image_tensor=fixed_image_standardization( extract_face(frame, box)))) return faces
# Checar se há GPU disponÃÂvel device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) # Definir parâmetros do módulo MTCNN mtcnn = MTCNN(keep_all=False, device=device, post_process=False) # Obter lista de arquivos e diretorios fname, dname = listar_imagens(basedir) # Detectar faces e salvar na pasta facecrops inicio = time.time() print('Processamento iniciado') facecrop = [it.replace(basedir, basedir+'_faces') for it in fname] for f, filename in enumerate(fname): try: img = Image.open(filename) box, prob = mtcnn.detect(img) except: print('Falha no processamento do arquivo '+filename) continue if prob[0] and prob[0] >= 0.95: savepath = '/projects/jeff/TUMGAIDimage_facecrops3' + '' + \ os.path.dirname(filename)[-9:]+'-'+os.path.basename(filename) extract_face(img, box[0], save_path=savepath) print('Processamento concluido') print(time.strftime('%H:%M:%S', time.localtime())) tempo_total = time.time() - inicio print("Tempo total: %02dm:%02ds" % divmod(tempo_total, 60))
def detect(self, img_ls, crop_size=None, mode='Extract_largest', save_faces=False, save_annotate=False, save_path='face_result'): """face detection Args: img_ls (list): list of array crop_size (tuple, optional): crop images with (left, top, right, bottom). Defaults to None. mode (str, optional): There're 3 modes, 'Detect', 'Detect_bool', and 'Extract'. If you only want to know whether there're any faces, use 'Detect_bool' mode. If you want to get boxes and probs of faces, use 'Detect'. If you want to get all information about faces, use 'Extract'. Defaults to 'Detect_bool'. face_num (int, optional): Number of faces to be extracted. Defaults to 1. save_faces (bool, optional): For 'Extract' mode. Defaults to False. save_annotate (bool, optional): For 'Extract' mode. Save images with annotations. Defaults to False. Returns: tuple: depends on the mode. """ if crop_size: for i, img in enumerate(img_ls): img_ls[i] = img.crop(crop_size) try: boxes, probs = self.mtcnn.detect(img_ls) except Exception as e: print( f'{e} \n...add crop_size=(left, top, right, bottom) to make images the same' ) if mode == 'Detect_bool': return isinstance(boxes, np.ndarray) elif mode == 'Detect': return boxes, probs elif 'Extract' in mode: faces = [] annotates = [] boxes = boxes.tolist() probs = probs.tolist() for id_, img in enumerate(img_ls): face_batch = [] img_annotate = img.copy() draw = ImageDraw.Draw(img_annotate) box_all = boxes[id_] if mode == 'Extract_largest': for i, box in enumerate(box_all): left = max(0, box[0]) top = max(0, box[1]) right = min(np.array(img_ls[id_]).shape[1], box[2]) down = min(np.array(img_ls[id_]).shape[0], box[3]) box_all[i] = [left, top, right, down] area = list(map(self._cal_area, box_all)) max_id = area.index(max(area)) box = box_all[max_id] box_head = [ box[0] - box[0] / 8, box[1] - box[1] / 5, box[2] + box[2] / 8, box[3] + box[3] / 10 ] boxes[id_] = [box_head] probs[id_] = [probs[id_][max_id]] draw.rectangle(box_head, width=5) if save_faces: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists(os.path.join(save_path, 'faces')): os.mkdir(os.path.join(save_path, 'faces')) face_batch.append( extract_face(img, box_head, save_path=os.path.join( save_path, f'detected_face_{id_}-{0}.png'))) else: face_batch.append(extract_face(img, box_head)) elif mode == 'Extract_all': for i, box in enumerate(box_all): box_head = [ box[0] - box[0] / 3, box[1] - box[1] / 3, box[2] + box[2] / 83, box[3] + box[3] / 10 ] box_all[i] = box_head draw.rectangle(box_head, width=5) # box.tolist() if save_faces: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists( os.path.join(save_path, 'faces')): os.mkdir(os.path.join(save_path, 'faces')) face_batch.append( extract_face( img, box_head, save_path=os.path.join( save_path, f'detected_face_{id_}-{i}.png'))) else: face_batch.append(extract_face(img, box_head)) else: print(f"Error: there's no mode called {mode}") faces.append(face_batch) annotates.append(np.asarray(img_annotate)) if save_annotate: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists( os.path.join(save_path, 'annotations')): os.mkdir(os.path.join(save_path, 'annotations')) img_annotate.save( os.path.join(save_path, f'annotated_faces_{id_}.png')) return np.asarray(boxes), probs, annotates, faces else: print(f"Error: there's no mode called {mode}")
def detect_live(self): mtcnn = MTCNN() faces = {} frameCount = 0 vid = cv2.VideoCapture(0) if self.record_for is not None : start_time = time.time() while vid.isOpened(): if self.record_for is not None : curr_time = time.time() - start_time if curr_time > self.record_for : break _, frame = vid.read() frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) frameCount = frameCount + 1 boxes, probs = mtcnn.detect(frame) frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if boxes is not None: faces["frame_{}".format(frameCount)] = [] for box, p in zip(boxes, probs) : if p > 0.70 : draw.rectangle(box.tolist(), outline = (255, 0, 0), width = 1) if self.extract == True : face = extract_face(frame, box.tolist()) faces["frame_{}".format(frameCount)].append(face) if self.save == True : img = self.tsfms(face) if self.saveIn is None : raise ValueError else : img.save(os.path.join(self.saveIn, "frame_{}.jpg".format(len(faces)))) cv2.imshow("Tracking window", cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR)) if self.save_video == True : self.frames_tracked.append(frame_draw) if cv2.waitKey(1) == ord("a") : break vid.release() if self.save_video == True: print(len(self.frames_tracked)) self.saveVideo(self.saveIn, self.frames_tracked, "trackedVid") if self.save == True : return len(faces.keys()), faces else : return None, None
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) mtcnn = MTCNN(keep_all=True, device=device) frames = [] #files = glob.glob("/home/jeff/datasets/TUM Gait/data_person1+2/image/p001/b01/*") files = glob.glob("reid/b01/*") for myFile in files: fileName = os.path.splitext(os.path.basename(myFile))[0] img = Image.open(myFile) boxes, probs, points = mtcnn.detect(img, landmarks=True) if boxes is not None: # Draw boxes and save faces img_draw = img.copy() draw = ImageDraw.Draw(img_draw) for i, (box, point) in enumerate(zip(boxes, points)): draw.rectangle(box.tolist(), width=5) for p in point: #draw.rectangle((p - 10).tolist() + (p + 10).tolist(), width=10) draw.point(p) extract_face( img, box, save_path='reid/output/detected_face_{}_{}.png'.format( fileName, i)) img_draw.save('reid/output/annotated_faces_{}.png'.format(fileName))
def main(): # Read options opt = TestOptions().parse(save=False) # If demo directory to save generated frames is given if opt.demo_dir is not None and not os.path.exists(opt.demo_dir): os.makedirs(opt.demo_dir) # hardcoded constant values opt.nThreads = 0 opt.batchSize = 1 opt.serial_batches = True # GPU id to be used for mxnet/reconstructor opt.gpu_id = opt.gpu_ids[-1] # Device to be used for MTCNN face detector detector_device = 'cpu' # Face bounding box margin margin = 120 # How many frames from the target's training video # to consider when gathering head pose and eye size statistics n_frames_target_used = 1000 # How many of the first source frames to consider for eye size adaptation # between source and target. n_frames_init = 25 # For cuda initialization errors. torch.multiprocessing.set_start_method('spawn', force=True) # Initialize video renderer. modelG = create_model(opt) # Initialize NMFC renderer. renderer = NMFCRenderer(opt) # Initialize face detector. detector = MTCNN(image_size=opt.loadSize, margin=margin, post_process=False, device=detector_device) # Initialize landmark extractor. dlib_detector = dlib.get_frontal_face_detector() dlib_predictor = dlib.shape_predictor( 'preprocessing/files/shape_predictor_68_face_landmarks.dat') # Read the identity parameters from the target person. id_params, _ = read_params( 'id', os.path.join(opt.dataroot, 'train', 'id_coeffs'), opt.target_name) # Read camera parameters from target t_cam_params, _ = read_params('cam', os.path.join(opt.dataroot, 'train', 'misc'), opt.target_name) t_cam_params = t_cam_params[:n_frames_target_used] # Read eye landmarks from target's video. eye_landmarks_target = read_eye_landmarks( os.path.join(opt.dataroot, 'train', 'landmarks70'), opt.target_name) eye_landmarks_target[0] = eye_landmarks_target[0][:n_frames_target_used] eye_landmarks_target[1] = eye_landmarks_target[1][:n_frames_target_used] # Setup camera capturing window_name = 'Hea2Head Demo' video_capture = cv2.VideoCapture(0) video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2) # set double buffer for capture fps = video_capture.get(cv2.CAP_PROP_FPS) print("Video capture at {} fps.".format(fps)) proccesses = [] # Face tracker / detector box_redecect_nframes = opt.box_redetect_nframes box = None # Face bounding box, calculated by first frame # Face reconstructor / NMFC renderer nmfc = None # Current nmfc image s_cam_params = [] # camera parameters of source video. adapted_cam_params = [ ] # camera parameters of source video, adapted to target. # Facial (eyes) landmarks detector prev_eye_centres = None # Eye centres in previous frame eye_landmarks = None # Final eye landmarks, send to video renderer. eye_landmarks_source = [ [], [] ] # Eye landmarks from n_frames_init first frames of source video. eye_landmarks_source_queue = Queue( ) # Queue to write extracted eye landmarks from source video. landmarks_success_queue = Queue( ) # Queue to write whether eye landmark detection was successful frames_queue = Queue( ) # Queue for writing video frames, read by the landmark detector process. # Process for running 68 + 2 landmark detection in parallel with Face reconstruction / NMFC renderering proccess_eye_landmarks = Process( target=compute_eye_landmarks, args=(dlib_detector, dlib_predictor, eye_landmarks_source_queue, landmarks_success_queue, frames_queue)) proccess_eye_landmarks.start() proccesses.append(proccess_eye_landmarks) print('Launced landmark extractor!') # Video renderer (GAN). input_queue = torchQueue() # Queue of GAN's input output_queue = torchQueue() # Queue of GAN's output # Process for running the video renderer without waiting NMFC + eye lands creation. proccess_video_renderer = torchProcess(target=compute_fake_video, args=(input_queue, output_queue, modelG, opt)) proccess_video_renderer.start() proccesses.append(proccess_video_renderer) print('Launced video renderer!') camera = None if opt.realtime: try: import pyfakewebcam stream_id = opt.realtime_cam_id webcam_width = webcam_height = opt.loadSize camera = pyfakewebcam.FakeWebcam(f'/dev/video{stream_id}', webcam_width, webcam_height) camera.print_capabilities() print(f'Fake webcam created on /dev/video{stream_id}.') except Exception as ex: print('Fake webcam initialization failed:') print(str(ex)) iter = 0 # Start main Process (Face reconstruction / NMFC renderering) while True: t0 = time.perf_counter() try: # Read generated frames from video renderer's output Queue. # Non-blocking fake_frame, real_frame = output_queue.get_nowait() result = np.concatenate([real_frame, fake_frame[..., ::-1]], axis=1) # If output directory is specified save frames there. if opt.demo_dir is not None: result_path = os.path.join(opt.demo_dir, "{:06d}".format(iter) + '.png') cv2.imwrite(result_path, result) elif camera is not None: camera.schedule_frame(fake_frame) else: cv2.imshow(window_name, result) cv2.waitKey(1) except queue.Empty: # If empty queue continue. pass # Read next frame _, frame = video_capture.read() # Crop the larger dimension of frame to make it square frame = make_frame_square(frame) if box_redecect_nframes > 0 and iter % box_redecect_nframes == 0: box = None # If no bounding box has been detected yet, run MTCNN (once in first frame) if box is None: box = detect_box(detector, frame) # If no face detected exit. if box is None: break # Crop frame at the point were the face was seen in the first frame. frame = extract_face(frame, box, opt.loadSize, margin) frame = tensor2npimage(frame) frame = np.transpose(frame, (1, 2, 0)) # Send ROI frame to landmark detector, while the main Process performs face reconstruction. frames_queue.put(frame) # Get expression and pose, adapt pose and identity to target and render NMFC. success, s_cam_params, adapted_cam_params, new_nmfc = \ compute_reconstruction(renderer, id_params, t_cam_params, s_cam_params, adapted_cam_params, frame) # Update the current NMFC if reconstruction was successful if success: nmfc = new_nmfc # If not, use previous nmfc. If it does not exist, exit. if not success and nmfc is None: break # Find eye centres using nmfc image. eye_centres, prev_eye_centres = search_eye_centres([nmfc[:, :, ::-1]], prev_eye_centres) # Read Queue to get eye landmarks, if detection was successful. if landmarks_success_queue.get(): eye_landmarks = eye_landmarks_source_queue.get() # If not, use previous eye landmarks. If they do not exist, exit. if eye_landmarks is None: break # If in first frames, determine the source-target eye size (height) ratio. if iter < n_frames_init: eye_landmarks_source[0].append(eye_landmarks[0]) eye_landmarks_source[1].append(eye_landmarks[1]) eye_ratios = compute_eye_landmarks_ratio(eye_landmarks_source, eye_landmarks_target) # Adapt the eye landmarks to the target face, by placing to the eyes centre # and re-scaling their size to match the NMFC size and target eyes mean height (top-down distance). eye_lands = adapt_eye_landmarks( [[eye_landmarks[0]], [eye_landmarks[1]]], eye_centres, eye_ratios, s_cam_params[-1:], adapted_cam_params[-1:]) # Send the conditional input to video renderer input_queue.put((nmfc, eye_lands[0], frame)) iter += 1 # Show frame rate. t1 = time.perf_counter() dt = t1 - t0 print('fps: %0.2f' % (1 / dt)) # Terminate proccesses and join for process in proccesses: process.terminate() process.join() renderer.clear() print('Main process exiting')
def input_face_embeddings(frames: Union[List[str], np.ndarray], is_path: bool, mtcnn: MTCNN, resnet: InceptionResnetV1, face_embed_cuda: bool, use_half: bool, coord: List, name: str = None, save_frames: bool = False) -> torch.Tensor: """ Get the face embedding NOTE: If a face is not detected by the detector, instead of throwing an error it zeros the input for embedder. NOTE: Memory hungry function, hence the profiler. Args: frames: Frames from the video is_path: Whether to read from filesystem or memory mtcnn: face detector resnet: face embedder face_embed_cuda: use cuda for model use_half: use half precision Returns: emb: Embedding for all input frames """ if face_embed_cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") result_cropped_tensors = [] no_face_indices = [] for i, f in enumerate(frames): if is_path: frame = Image.open(f) else: frame = Image.fromarray(f.astype("uint8")) with torch.no_grad(): cropped_tensors = None height, width, c = f.shape bounding_box, prob = mtcnn.detect(frame) if bounding_box is not None: for box in bounding_box: x1, y1, x2, y2 = box if (x1 > x2): x1, x2 = x2, x1 if (y1 > y2): y1, y2 = y2, y1 #for point in coord: x, y = coord[0], coord[1] x *= width y *= height if (x >= x1 and y >= y1 and x <= x2 and y <= y2): cropped_tensors = extract_face(frame, box) #print("found", box, x, y, end='\r') break if cropped_tensors is None: #Face not detected, for some reason cropped_tensors = torch.zeros((3, 160, 160)) no_face_indices.append(i) if save_frames: name = name.replace(".mp4", "") saveimg = cropped_tensors.detach().cpu().numpy().astype("uint8") saveimg = np.squeeze(saveimg.transpose(1, 2, 0)) Image.fromarray(saveimg).save(f"{name}_{i}.png") result_cropped_tensors.append(cropped_tensors.to(device)) if len(no_face_indices) > 20: #few videos start with silence, allow 0.5 seconds of silence else remove return None del frames #Stack all frames result_cropped_tensors = torch.stack(result_cropped_tensors) #Embed all frames result_cropped_tensors = result_cropped_tensors.to(device) if use_half: result_cropped_tensors = result_cropped_tensors.half() with torch.no_grad(): emb = resnet(result_cropped_tensors) if use_half: emb = emb.float() return emb.to(cpu_device)
def calcEmbedsRec(urlNew): #initialize identified names recognized_names = [] print('Received url: ', urlNew) device = torch.device('cuda:0') print('Running on device: {}'.format(device)) mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True, device=device) #Function takes 2 vectors 'a' and 'b' #Returns the cosine similarity according to the definition of the dot product def cos_sim(a, b): dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) return dot_product / (norm_a * norm_b) #cos_sim returns real numbers,where negative numbers have different interpretations. #So we use this function to return only positive values. def cos(a, b): minx = -1 maxx = 1 return (cos_sim(a, b) - minx) / (maxx - minx) # Define Inception Resnet V1 module (GoogLe Net) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) # Define a dataset and data loader dataset = datasets.ImageFolder('student_data/Test') dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()} loader = DataLoader(dataset, collate_fn=lambda x: x[0]) #Perfom MTCNN facial detection #Detects the face present in the image and prints the probablity of face detected in the image. aligned = [] names = [] for x, y in loader: x_aligned, prob = mtcnn(x, return_prob=True) if x_aligned is not None: print('Face detected with probability: {:8f}'.format(prob)) aligned.append(x_aligned) names.append(dataset.idx_to_class[y]) # Calculate the 512 face embeddings aligned = torch.stack(aligned).to(device) embeddings = resnet(aligned).to(device) # Print distance matrix for classes. #The embeddings are plotted in space and cosine distace is measured. cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6) for i in range(0, len(names)): emb = embeddings[i].unsqueeze(0) # The cosine similarity between the embeddings is given by 'dist'. dist = cos(embeddings[0], emb) dists = [[cos(e1, e2).item() for e2 in embeddings] for e1 in embeddings] # The print statement below is #Helpful for analysing the results and for determining the value of threshold. print(pd.DataFrame(dists, columns=names, index=names)) i = 1 # Haarcascade Classifier is used to detect faces through webcam. #It is preffered over MTCNN as it is faster. Real time basic applications needs to be fast. classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') #Takes 2 vectors 'a' and 'b' . #Returns the cosine similarity according to the definition of the dot product. def cos_sim(a, b): dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) return dot_product / (norm_a * norm_b) #cos_sim returns real numbers,where negative numbers have different interpretations. #So we use this function to return only positive values. def cos(a, b): minx = -1 maxx = 1 return (cos_sim(a, b) - minx) / (maxx - minx) #This is the function for doing face recognition. def verify(embedding, start_rec_time): for i, k in enumerate(embeddings): for j, l in enumerate(embedding): #Computing Cosine distance. dist = cos(k, l) #Chosen threshold is 0.85 #Threshold is determined after seeing the table in the previous cell. if dist > 0.8: #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box). text = names[i] #textOnImg = text + " - Time Elapsed: " + str(int(time.time() - start_rec_time)) + " s" cv2.putText(img1, text, (boxes[j][0].astype(int), boxes[j][3].astype(int) + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2) #cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,0,0), 2) print(text) #if text in names: recognized_names.append(text) #else: textOnImg = "Time Elapsed: " + str( int(time.time() - start_rec_time)) + " s" cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) #Define Inception Resnet V1 module (GoogLe Net) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True, device=device, keep_all=True) #Camera is opened. Webcam video streaming starts. #vs = WebcamVideoStream(src=0).start() print("Camera on") cv2.namedWindow("Detected faces") options = { "CAP_PROP_FRAME_WIDTH": 640, "CAP_PROP_FRAME_HEIGHT": 480, "CAP_PROP_FPS ": 30 } output_params = {"-fourcc": "MJPG", "-fps": 30} writer = WriteGear(output_filename='Output.mp4', compression_mode=False, logging=True, **output_params) #stream = VideoGear(source=0, time_delay=1, logging=True, **options).start() #url = "http://192.168.43.223:8080/shot.jpg" url = urlNew #run face recognition for 1 minute start_face_rec = time.time() end_face_rec = time.time() + 60 while (time.time() < end_face_rec): # frm = stream.read() # if frm is None: # break img_resp = requests.get(url) img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8) img = cv2.imdecode(img_arr, -1) #im= vs.read() #Flip to act as a mirror im = cv2.flip(img, 1) #try: #The resize function of imutils maintains the aspect ratio #It provides the keyword arguments width and heightso the image can be resized to the intended width/height frame = imutils.resize(im, width=400) #Detecting faces using Haarcascade classifier. winlist = pcn.detect(frame) img1 = pcn.draw(frame, winlist) face = list(map(lambda win: crop_face(img1, win, 160), winlist)) face = [f[0] for f in face] #cv2.imshow('Live Feed', img1) cnt = 1 for f in face: #fc, u = crop_face(img, f) print('Printing Face no: ', cnt) cv2.imshow('Detected faces', f) cnt += 1 #faces = classifier.detectMultiScale(face) path = "./student_data/Pics/".format(i) img_name = "image_{}.jpg".format(i) #The captured image is saved. cv2.imwrite(os.path.join(path, img_name), f) imgName = "./student_data/Pics/image_{}.jpg".format(i) # Get cropped and prewhitened image tensor img = Image.open(imgName) i = i + 1 img_cropped = mtcnn(img) boxes, prob = mtcnn.detect(img) img_draw = img.copy() draw = ImageDraw.Draw(img_draw) #print(boxes) #Rectangular boxes are drawn on faces present in the image. #The detected and cropped faces are then saved. if (boxes is not None): for i, box in enumerate(boxes): #draw.rectangle(box.tolist()) extract_face( img, box, save_path='./student_data/Pics/Cropped_Face_{}.jpg'. format(i)) img_draw.save('./student_data/Pics/Faces_Detected.jpg') ima = cv2.imread('./student_data/Pics/Faces_Detected.jpg') #Calculate embeddings of each cropped face. if (img_cropped is not None): img_embedding = resnet(img_cropped.cuda()).to(device) #Call function verify. #Identify the person with the help of embeddings. cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6) verify(img_embedding, start_face_rec) #else: #textForImg = "Time Elapsed: " + str(int(time.time() - start_face_rec)) + " s" #cv2.putText(frame, textForImg, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2) #'Detecting..' window opens. #Rectangular boxes are drawn on detected faces. #The identified faces have their respective name below the box. cv2.imshow('Detecting...', img1) writer.write(img1) if (not face): #cv2.imshow(f"Time Elapsed: ${str(int(time.time() - start_face_rec))} s" ,frame) textForImg = "Time Elapsed: " + str( int(time.time() - start_face_rec)) + " s" cv2.putText(img1, textForImg, (40, 40), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2) #print("no face") cv2.imshow('Detecting...', img1) # except: # #In case 'try' doesn't work, "Get the image embedding" text is printed on the screen. # #Run first cell # text="Get the image embeddings" # print(text) # break key = cv2.waitKey(1) #13 is for 'Enter' key. #If 'Enter' key is pressed, all the windows are made to close forcefully. if key == 13: break print("calculating a list of all recognized faces...") rec_names_dict = {i: recognized_names.count(i) for i in recognized_names} filtered_names = [] for key in rec_names_dict: if rec_names_dict[key] > 30: filtered_names.append(key) print("Total Recognized names: ", rec_names_dict) print("Filtered names: ", filtered_names) cv2.destroyAllWindows() writer.close() #vs.stop() #return {i:rec_names_dict[i] for i in filtered_names} return filtered_names
def upload_file(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit an empty part without filename if request.files['file'].filename == '': #flash('No selected file') return render_template('notselected.html',message = 'File is not selected') if 'jpg' in str(file.filename).lower() or 'jpeg' in str(file.filename).lower() or 'png' in str(file.filename).lower(): if file and allowed_file(file.filename): filename = secure_filename(file.filename) filedir = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filedir) filek = '/static/uploads/'+filename img = Image.open(filedir) boxes, probs = mtcnn.detect(img) # Draw boxes and save faces img_draw = img.copy() draw = ImageDraw.Draw(img_draw) if boxes is not None: for j, box in enumerate(boxes): extract_face(img, box, save_path='detected/detected_face_{}.png'.format(j)) with open(str(APP_ROOT) +'/detected/detected_face_{}.png'.format(j), 'rb') as f: image_bytes = f.read() pred_idx = get_prediction(image_bytes=image_bytes) if int(pred_idx) == 0: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green')) else: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red')) img_draw.save(filedir) if 'mp4' in str(file.filename).lower() or 'mov' in str(file.filename).lower(): if file and allowed_file(file.filename): filename = secure_filename(file.filename) filedir = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filedir) filek = '/static/uploads/'+filename video = mmcv.VideoReader(filedir) frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video] last_predict = [] box_list = [] frames_tracked = [] # loop through frames for i, frame in enumerate(frames): width, height = frame.size # Frequency to detect if i % frequency == 0: last_predict.clear() # Detect faces boxes, prob = mtcnn.detect(frame) box_list.append(boxes) # Draw boxes and save faces frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) # Check if there is detection if boxes is not None: #Loop through all detections for j, box in enumerate(boxes): extract_face(frame, box, save_path='detected/detected_face_{}.png'.format(j)) with open(str(APP_ROOT) +'/detected/detected_face_{}.png'.format(j), 'rb') as f: image_bytes = f.read() pred_idx = get_prediction(image_bytes=image_bytes) last_predict.append(pred_idx) #blist = box.tolist() if int(pred_idx) == 0: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green')) #draw.text((blist[2], blist[3]), 'Mask', fill='green', font = ImageFont.truetype("/content/ArialMT.ttf",20)) #draw.text((blist[2], blist[3]-30), str(prob), fill='green', font = ImageFont.truetype("/content/ArialMT.ttf",20)) else: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red')) #draw.text((blist[2], blist[3]), 'Without mask', fill='red', font = ImageFont.truetype("/content/ArialMT.ttf",20)) #draw.text((blist[2], blist[3]-30), str(prob), fill='red', font = ImageFont.truetype("/content/ArialMT.ttf",20)) frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR)) # If not detected else: frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR)) # If this is not the frequency to detect else: # If there were detections in previous frame if box_list[-1] is not None: boxes = box_list[-1] # Draw boxes and save faces frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) # If there were detections in previous frame if boxes is not None: for j, box in enumerate(boxes): if int(last_predict[j]) == 0: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green')) else: draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red')) frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR)) else: # if no detections in previous frame add just a frame frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR)) dim = frames_tracked[0].size fourcc = cv2.VideoWriter_fourcc(*'FMP4') video_tracked = cv2.VideoWriter(filename, fourcc, 25.0, dim) for frame in frames_tracked: video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) video_tracked.release() os.replace((str(APP_ROOT)+ '/' + filename), (str(APP_ROOT) +filek)) return render_template('out.html', filek=filek) return render_template('index.html')
def _get_emb(image, box): """Return facial embeddings from given image inside the box.""" cropped_face = extract_face(image, box) cropped_face = prewhiten(cropped_face) return resnet(cropped_face.unsqueeze(0))[0].detach()