def features(self, images): images_np_array = [image_as_array(image) for image in images] image_height, image_width = images_np_array[ 0].shape[: 2] # All images need to be same size, otherwise dont use batch batch_face_locations = face_recognition.batch_face_locations( images_np_array, batch_size=len(images), number_of_times_to_upsample=1) detections = [] for face_locations, image in zip(batch_face_locations, images_np_array): if not face_locations: detections.append([]) continue crops = self.create_crops(width=image_width, height=image_height, face_locations=face_locations) face_features = self.face_features(image=image, face_locations=face_locations) detections.append([ FaceDetection(crop=crop, encoding=features) for crop, features in zip(crops, face_features) ]) return detections
def print_matches_vid(act_frames, img_in_frames, output, frame_count, known_face_encodings, known_face_name, in_tolerance=float(0.6), upsample=0): x = in_tolerance batch_of_face_locations = face_recognition.batch_face_locations( img_in_frames, batch_size=2, number_of_times_to_upsample=upsample) for frame_number_in_batch, face_locations in enumerate( batch_of_face_locations): number_of_faces_in_frame = len(face_locations) frame_number = frame_count - 2 + frame_number_in_batch print("Found {} face(s) in frame #{}.".format(number_of_faces_in_frame, frame_number)) pil_image = Image.fromarray(act_frames[frame_number_in_batch]) face_encodings = face_recognition.face_encodings( img_in_frames[frame_number_in_batch], known_face_locations=face_locations) face_names = [] for face_encoding in face_encodings: matches = face_recognition.compare_faces(known_face_encodings, face_encoding, tolerance=x) name = "Unknown" face_distances = face_recognition.face_distance( known_face_encodings, face_encoding) best_match_index = np.argmin(face_distances) if matches[best_match_index]: im = known_face_encodings[best_match_index] name = known_face_name[str(im)] face_names.append(name) for (top, right, bottom, left), name in zip(face_locations, face_names): cv2.rectangle(act_frames[frame_number_in_batch], (left, top), (right, bottom), (0, 0, 255), 2) cv2.rectangle(act_frames[frame_number_in_batch], (left, bottom - 25), (right, bottom), (0, 0, 255), cv2.FILLED) font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(act_frames[frame_number_in_batch], name, (left + 6, bottom - 6), font, 0.5, (255, 255, 255), 1) print("Writing frame {}".format(frame_number)) output.write(act_frames[frame_number_in_batch])
def __get_location_frames(self): '''Get the total faces detected and f.indexed locations/original/process based on hits.''' batch = fr.batch_face_locations(self.__process_frames, 1, self.__ps.batch_size) hits = np.nonzero(batch)[0] locations = np.asarray(batch)[hits] return (sum(len(x) for x in locations), zip(locations, np.asarray(self.__original_frames)[hits], np.asarray(self.__process_frames)[hits]))
def process_video(vid): print(f"Processing: {vid}") basename, _ = os.path.splitext(os.path.basename(vid)) os.makedirs(f"{basename}_faces", exist_ok=True) if os.path.exists(f"{basename}.maga"): return video_capture = cv2.VideoCapture(vid) faces = 0 frames = [] while video_capture.isOpened(): # Grab a single frame of video ret, frame_cv = video_capture.read() # Bail out when the video file ends if not ret: break frame = cv2.cvtColor(frame_cv, cv2.COLOR_BGR2RGB) frames.append(frame) print(".", end="", flush=""), # Tune this to GPU Memory Size. if len(frames) == BATCH_FRAMES: print("#", end="", flush="") batch_of_face_locations = face_recognition.batch_face_locations( frames, number_of_times_to_upsample=0) for idx, face_locations in enumerate(batch_of_face_locations): if len(face_locations) == 0: continue face_encodings = face_recognition.face_encodings( face_image=frames[idx], known_face_locations=face_locations) for face_encoding, face_location in zip( face_encodings, face_locations): print("$", end="", flush=True) top, right, bottom, left = face_location # You can access the actual face itself like this: face_image = frame[top:bottom, left:right] pil_image = Image.fromarray(face_image) pil_image.save( f"{basename}_faces/{basename}-{faces:08d}.jpg") with open(f"{basename}_faces/{basename}-{faces:08d}.np", "wb") as b: np.save(b, face_encoding, allow_pickle=False, fix_imports=False) faces += 1 print("", end="\n", flush=True) frames = [] with open(f"{basename}.maga", "w") as F: F.write("MAGA!")
def face_recognition(self, image_list, using_cuda=True, batch_size=16, model="cnn"): ''' :param image_list: 待处理的图像列表。 :param using_cuda: 是否使用GPU加速,如果使用GPU,则推荐使用batch cnn模型 :param batch_size: 一个batch的大小 :param model: cnn/hog,hog模型在cpu的环境下运行较快 :return: ''' location_list = [] print("Start Face Detection") if using_cuda and model == "cnn": for i in tqdm(range(0, len(image_list), batch_size)): batch_location = face_recognition.batch_face_locations( image_list[i:i + batch_size], number_of_times_to_upsample=0, batch_size=batch_size) location_list = location_list + batch_location else: for i in tqdm(range(len(image_list))): image = image_list[i] location = face_recognition.face_locations(image, model=model) location_list.append(location) # face_image, loc = self.face_extract(image, loc) assert len(image_list) == len(location_list) speaking_label_list = [] for i in range(len(location_list)): image, locations = image_list[i], location_list[i] speaking_label = [] for loc in locations: face_image, dlib_loc = self.face_extract(image, loc) check_result = self.face_check(image, dlib_loc) if not check_result["flag"]: self.face_save(check_result["embedding"], face_image, check_result["index"]) abs_label, rel_label = self.speaking_label( check_result["shape"]) speaking_label.append({ "index": check_result["index"], "abs_label": abs_label, "rel_label": rel_label }) speaking_label_list.append(speaking_label) return speaking_label_list
def detectSpeakerFace(frame_paths, speaker_enc): n = len(frame_paths) frame_batch = [] speaker_bb = {} for frame_counter in range(n): frame = fr.load_image_file(frame_paths[frame_counter]) frame_batch.append(frame) if frame_counter != n - 1 and len(frame_batch) != BATCH_SIZE: continue loc_batch = fr.batch_face_locations( frame_batch, number_of_times_to_upsample=UPSAMPLE) for frame_number_in_batch, curr_locations in enumerate(loc_batch): curr_frame_number = frame_counter + 1 - len( frame_batch) + frame_number_in_batch curr_frame_path = frame_paths[curr_frame_number] curr_frame = frame_batch[frame_number_in_batch] m = ('%-20s %-6d %-3d' % (curr_frame_path, curr_frame_number, len(curr_locations))) print(FORMAT % ('detect_frame', m)) if len(curr_locations) == 0: continue curr_encodings = fr.face_encodings( curr_frame, known_face_locations=curr_locations) res = fr.compare_faces(curr_encodings, speaker_enc) # TODO: find the res[k] == True such that distance to speaker is minimized for k in range(len(curr_encodings)): enc = curr_encodings[k] loc = curr_locations[k] curr_frame_name = '/'.join(curr_frame_path.split('/')[-2:]) if res[k] == True: top, right, bottom, left = curr_locations[k] speaker_bb[curr_frame_name] = {} speaker_bb[curr_frame_name]['top'] = top speaker_bb[curr_frame_name]['right'] = right speaker_bb[curr_frame_name]['bottom'] = bottom speaker_bb[curr_frame_name]['left'] = left frame_batch = [] return speaker_bb
def batch_face_locations(images: List[numpy.ndarray], batch_size=128): """Finds all face locations in a list of images through batch processing Args: images (List[numpy.ndarray]): List of images Returns: List[tuple]: Face boxes (top, right, bottom, left) """ if batch_size < 1: raise ValueError face_locations = fr.batch_face_locations(images, number_of_times_to_upsample=0, batch_size=batch_size) assert len(face_locations) == len(images) return face_locations
def getFrameInfo(frame_paths, speaker_enc): n = len(frame_paths) frame_batch = [] frame_info = {} for frame_counter in range(n): frame = fr.load_image_file(frame_paths[frame_counter]) frame_batch.append(frame) if frame_counter != n - 1 and len(frame_batch) != BATCH_SIZE: continue loc_batch = fr.batch_face_locations( frame_batch, number_of_times_to_upsample=UPSAMPLE) for frame_number_in_batch, curr_locations in enumerate(loc_batch): curr_frame_number = frame_counter + 1 - len( frame_batch) + frame_number_in_batch curr_frame_path = frame_paths[curr_frame_number] curr_frame = frame_batch[frame_number_in_batch] print("%-20s %-6d %-3d" % (curr_frame_path, curr_frame_number, len(curr_locations))) if len(curr_locations) == 0: continue curr_encodings = fr.face_encodings( curr_frame, known_face_locations=curr_locations) res = fr.compare_faces(curr_encodings, speaker_enc) for k in range(len(curr_encodings)): enc = curr_encodings[k] loc = curr_locations[k] curr_frame_name = '/'.join(curr_frame_path.split('/')[-2:]) frame_info[curr_frame_name] = {} if res[k] == True: top, right, bottom, left = curr_locations[k] frame_info[curr_frame_name]['top'] = top frame_info[curr_frame_name]['right'] = right frame_info[curr_frame_name]['bottom'] = bottom frame_info[curr_frame_name]['left'] = left frame_batch = [] return frame_info
def get_face_locations(frames, GPU=False, batch_size=64): face_coordinates = [] if GPU: for i in range(0, len(frames), batch_size): batch_of_frames = frames[i:i+batch_size] batch_face_locations = face_recognition.batch_face_locations(batch_of_frames, number_of_times_to_upsample=0) face_coordinates += batch_face_locations face_coordinates = [f[-1] if f is not None and len(f) else None for f in face_coordinates] else: for frame in tqdm(frames): coordinates_found = face_recognition.face_locations(frame) if coordinates_found is not None and len(coordinates_found): face_coordinates.append(coordinates_found[-1]) else: face_coordinates.append(None) return face_coordinates
def crop_face_batch(imgs, batch_size): data_len = imgs.shape[0] hasFace = np.ones((data_len), dtype=bool) faces = np.ndarray((data_len, 3, 224, 224), dtype=np.float32) imgs = numpy_convert_to_list(imgs) batch_locs = fr.batch_face_locations(imgs, batch_size=batch_size) for idx, locations in enumerate(batch_locs): if len(locations) == 0: hasFace[idx] = False continue flags = locations[max_area_indx(locations)] cropped = cv.resize(imgs[idx][flags[0]:flags[2], flags[3]:flags[1]], (224, 224)).astype(np.float32) faces[idx] = np.transpose(cropped, (2, 0, 1)) return faces, hasFace
def processBatch(self, raw_frames, rgb_frames, frame_count, outdir): target_found = False batch_of_face_locations = face_recognition.batch_face_locations( rgb_frames, number_of_times_to_upsample=0) for frame_number_in_batch, face_locations in enumerate( batch_of_face_locations): frame_number = frame_count - len( rgb_frames) + frame_number_in_batch raw_frame = raw_frames[frame_number_in_batch] rgb_frame = rgb_frames[frame_number_in_batch] outfile = os.path.join(outdir, "frame_{0}.jpg".format(frame_number)) self.processImage(raw_frame, rgb_frame, outfile) return target_found
def batch_job(frames, images, positions, face_counts, batch_size): batch_of_face_locations = face_recognition.batch_face_locations( frames, number_of_times_to_upsample=0, batch_size=batch_size) for frame, faces in zip(frames, batch_of_face_locations): position = [] for (top, right, bottom, left) in faces: img_face = frame[top:bottom, left:right] img_yuv = cv2.cvtColor(img_face, cv2.COLOR_BGR2YUV) # equalize the histogram of the Y channel img_yuv[:, :, 0] = cv2.equalizeHist(img_yuv[:, :, 0]) # convert the YUV image back to RGB format img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR) img_output2 = cv2.resize(img_output, (299, 299), interpolation=cv2.INTER_AREA) images.append(img_output2) # faces position.append((top, right, bottom, left)) face_counts.append(len(faces)) positions.append(position)
def load_face_data(): # 已知人脸库 Start # 加载一个示例图片并学习如何识别它。 yzc_image = face_recognition.batch_face_locations("/know_face_img/YZC.jpg") yzc_face_encoding = face_recognition.face_encodings(yzc_image)[0] # 加载一个示例图片并学习如何识别它。 jack_image = face_recognition.load_image_file("/know_face_img/jack.jpg") jack_face_encoding = face_recognition.face_encodings(jack_image)[0] # 加载一个示例图片并学习如何识别它。 wuyj_image = face_recognition.load_image_file( "/know_face_img/WuYongjun.jpg") wuyj_face_encoding = face_recognition.face_encodings(wuyj_image)[0] # 创建已知人脸编码及其名称的数组 known_face_encodings = [ yzc_face_encoding, jack_face_encoding, wuyj_face_encoding ] known_face_names = ["Youzhengcai", "Jack Ma", "WuYongjun"]
def run(): assert os.path.exists("data") batch_size = 64 with open("data/celeba_cropped/list_eval_partition_filtered.txt", "r") as f: lines = f.readlines() fnames = list([l.split()[0] for l in lines]) print("Generating embeddings...") embeddings = [] for i in range(0, len(fnames), batch_size): print(i) imax = min(i + batch_size, len(fnames)) fname_batch = fnames[i:imax] images = list( [face_recognition.load_image_file("data/celeba/img_align_celeba/%s" % fname) for fname in fname_batch] ) locations = face_recognition.batch_face_locations(images, batch_size=batch_size) batch_embeddings = [] for j in range(len(fname_batch)): img = images[j] loc = locations[j] embedding_list = face_recognition.face_encodings(img, loc) if len(embedding_list) == 0: # Fill with nans embedding = np.full((128,), np.nan) else: embedding = embedding_list[0] batch_embeddings.append(embedding) embeddings.append(np.stack(batch_embeddings, axis=0)) embeddings = np.concatenate(embeddings, axis=0) np.save("data/celeba_cropped/embeddings.npy", embeddings) with open("data/celeba_cropped/embedding_file_order.txt", "w") as f: f.write("\n".join(fnames))
def faces_location_vedio(self, vedio_path): """每帧一个图片,待调""" import cv2 video_capture = cv2.VideoCapture(vedio_path) frames = [] frame_count = 0 while video_capture.isOpened(): # Grab a single frame of video ret, frame = video_capture.read() # Bail out when the video file ends if not ret: break # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) frame = frame[:, :, ::-1] # Save each frame of the video to a list frame_count += 1 frames.append(frame) print(frames) # Every 128 frames (the default batch size), batch process the list of frames to find faces if len(frames) == 128: batch_of_face_locations = face_recognition.batch_face_locations( frames, number_of_times_to_upsample=0) # Now let's list all the faces we found in all 128 frames for frame_number_in_batch, face_locations in enumerate( batch_of_face_locations): number_of_faces_in_frame = len(face_locations) frame_number = frame_count - 128 + frame_number_in_batch logger.info("I found {} face(s) in frame #{}.".format( number_of_faces_in_frame, frame_number)) for face_location in face_locations: # Print the location of each face in this frame top, right, bottom, left = face_location logger.info( " - A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}" .format(top, left, bottom, right)) # Clear the frames array to start the next batch frames = []
def extractFaces(frame_paths): n = len(frame_paths) face_encodings = [] enc_to_loc = [] frame_batch = [] for frame_counter in range(n): frame = fr.load_image_file(frame_paths[frame_counter]) frame_batch.append(frame) if frame_counter != n - 1 and len(frame_batch) != BATCH_SIZE: continue loc_batch = fr.batch_face_locations(frame_batch, number_of_times_to_upsample=UPSAMPLE) for frame_number_in_batch, curr_locations in enumerate(loc_batch): curr_frame_number = frame_counter + 1 - len(frame_batch) + frame_number_in_batch curr_frame_path = frame_paths[curr_frame_number] curr_frame = frame_batch[frame_number_in_batch] m = ('%-20s %-6d %-3d' % (curr_frame_path, curr_frame_number, len(curr_locations))) print(FORMAT % ('proc_frame', m)) if len(curr_locations) == 0: continue curr_encodings = fr.face_encodings(curr_frame, known_face_locations=curr_locations) for k in range(len(curr_encodings)): enc = curr_encodings[k] loc = curr_locations[k] enc_to_loc.append({'frame': curr_frame_number, 'loc': loc}) face_encodings.append(enc) frame_batch = [] return (face_encodings, enc_to_loc)
def calc_bbox(image_list, batch_size=5): """Batch infer of face location, batch_size should be factor of total frame number.""" top_sum = right_sum = bottom_sum = left_sum = 0 for i in tqdm(range(len(image_list) // batch_size)): image_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): image = face_recognition.load_image_file(image_list[j]) image_batch.append(image) face_locations = face_recognition.batch_face_locations( image_batch, number_of_times_to_upsample=0, batch_size=batch_size) for face_location in face_locations: top, right, bottom, left = face_location[ 0] # assuming only one face detected in the frame top_sum += top right_sum += right bottom_sum += bottom left_sum += left return (top_sum // len(image_list), right_sum // len(image_list), bottom_sum // len(image_list), left_sum // len(image_list))
img_files = glob("/home/ubuntu/big_data/*.jpg") dir_name = "/home/ubuntu/face_cropped/" for start_index in range(0, len(img_files), batch_size): # get a batch of image filenames from the directory img_batch = img_files[start_index:start_index + batch_size] # convert all those image files to numpy arrays images = {img: face_recognition.load_image_file(img) for img in img_batch} images = { fname: images[fname] for fname in images if images[fname].shape == (1920, 1080, 3) } fnames = list(images.keys()) images = list(images.values()) # use face_detection API to get face locations batch_of_face_locations = face_recognition.batch_face_locations( images, number_of_times_to_upsample=0, batch_size=len(images)) for i in range(len(batch_of_face_locations)): face_location = batch_of_face_locations[i][0] # unpack bounding box coordinates of face_locations top, right, bottom, left = face_location # crop the image to only the face face_image = images[i][top:bottom, left:right] # generate new filename with original basename in new directory new_file_name = dir_name + os.path.basename(fnames[i]) # save the image file to disk img = Image.fromarray(face_image, 'RGB') img.save(new_file_name) print("a completed batch at position: " + str(start_index))
while video_capture.isOpened(): # Grab a single frame of video ret, frame = video_capture.read() # Bail out when the video file ends if not ret: break # Save each frame of the video to a list frame_count += 1 frames.append(frame) # Every 128 frames (the default batch size), batch process the list of frames to find faces if len(frames) == 128: batch_of_face_locations = face_recognition.batch_face_locations(frames, number_of_times_to_upsample=0) # Now let's list all the faces we found in all 128 frames for frame_number_in_batch, face_locations in enumerate(batch_of_face_locations): number_of_faces_in_frame = len(face_locations) frame_number = frame_count - 128 + frame_number_in_batch print("I found {} face(s) in frame #{}.".format(number_of_faces_in_frame, frame_number)) for face_location in face_locations: # Print the location of each face in this frame top, right, bottom, left = face_location print(" - A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format(top, left, bottom, right)) # Clear the frames array to start the next batch frames = []
def face_dectect_knn(filename_video_input): label=0 knn_clf =pickle.load(open(filename_knn_model,'rb')) knn_clf.n_jobs=16 folder_temp=filename_video_input.split(".")[0] folder_temp =os.path.join(folder_output,folder_temp) if not os.path.exists(folder_temp): os.mkdir(folder_temp) file_video=os.path.join(folder_videos,filename_video_input) video = cv2.VideoCapture(file_video) frames_total=video.get(cv2.CAP_PROP_FRAME_COUNT) progress=tqdm([i for i in range(int(frames_total))],desc="正在识别视频帧",unit='帧') frame_count = 1 success = True begin = time.time() total={} frames=[] while success: success, frame = video.read() if (frame_count % frames_every_capture == 0): capture=int(frame_count/frames_every_capture) progress.update(frames_every_capture) frame = cv2.resize(frame, (0, 0),fx=resize_scale, fy=resize_scale,interpolation=cv2.INTER_CUBIC) # 视频画面可以缩小一些,但是阈值也要调整 frames.append(frame) if len(frames)>=frames_to_recofnize_once: batch_of_face_locations = face_recognition.batch_face_locations(frames, number_of_times_to_upsample=0) frame_index_zip=[] face_encodings_zip=[] face_locations_zip=[] for frame_index, face_locations in enumerate(batch_of_face_locations): if len(face_locations)==0: continue face_encodings = face_recognition.face_encodings(frame, face_locations) for i in range(len(face_encodings)): frame_index_zip.append(frame_index) face_encodings_zip+=face_encodings face_locations_zip+=face_locations predicts = knn_clf.predict(face_encodings_zip) for frame_index,(bottom, right, top, left), result in zip(frame_index_zip,face_locations_zip, predicts): if result == "ycy": frame = frames[frame_index] frame_number = capture - frames_to_recofnize_once + frame_index face=[bottom,top,left,right] image_cut = frame[bottom:top, left:right] label += 1 filename_save = "recognition/true/{}-{}.jpg".format(label,frame_number) cv2.imwrite(filename_save, image_cut) toleranses = { 0.4: 10, } if frame_number in total: if "faces" in total[frame_number]: total[frame_number]["faces"].append(face) else: total[frame_number]["faces"]=[face] else: total[frame_number]["toleranse"] = toleranses else: frame = frames[frame_index] frame_number = capture - frames_to_recofnize_once + frame_index image_cut = frame[bottom:top, left:right] label += 1 filename_save = "recognition/false/{}-{}.jpg".format(label,frame_number) cv2.imwrite(filename_save, image_cut) frames = [] frame_count = frame_count + 1 cv2.waitKey(1) end = time.time() progress.close() print("识别视频帧共计用时{}秒".format(round(float(end - begin), 3))) with io.open(os.path.join(folder_temp,filename_static),"w",encoding="utf-8") as fd: text = json.dumps(total, ensure_ascii=False, indent=4) fd.write(text) video.release()