def _main(_argv): detector = RetinaFace(FLAGS.weights_path, use_gpu_nms = False) if not os.path.isdir(FLAGS.save_folder): os.mkdir(FLAGS.save_folder) subdirs = [x[0] for x in os.walk(FLAGS.widerface_data_dir)][1:] save_dir = FLAGS.save_folder for subdir in subdirs: print(subdir) output_dir = os.path.join(save_dir, subdir.split("/")[-1]) if not os.path.isdir(output_dir): os.mkdir(output_dir) for file in os.listdir(subdir): if os.path.isfile(os.path.join(output_dir, file.replace("jpg", "txt"))): continue img = cv2.imread(os.path.join(subdir, file)) faces, ldmks = detector.detect(img, 0.01) with open(os.path.join(output_dir, file.replace("jpg", "txt")), "w+") as f: f.write(file.split("/")[-1].split(".")[0] + "\n") f.write(str(len(faces)) + "\n") for face in faces: f.write(str(int(face[0])) + " " + str(int(face[1])) + " " + str(int(face[2]) - int(face[0])) + " " + str(int(face[3]) - int(face[1])) + " " + str(face[4]) + "\n")
def test(path, imgname): scales = [640, 640] args = parse_args() print('args=', args) detector = RetinaFace(args.network, gpuid, nms=0.3) img = cv2.imread(path + imgname) print(img.shape) im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False all_time = 0 for c in range(count): start = time.time() faces, landmarks = detector.detect(img, thresh, scales=scales, do_flip=flip) end = time.time() if c != 0: all_time += end - start print('count{}, faces.shape={}, landmarks,shape={}'.format( c, faces.shape, landmarks.shape)) print("average time:{}".format(all_time / (count - 1))) if faces is not None: print('find', faces.shape[0], 'faces') font = cv2.FONT_HERSHEY_SIMPLEX for i in range(faces.shape[0]): #print('score', faces[i][4]) box = faces[i] ibox = box[0:4].copy().astype(np.int) cv2.rectangle(img, (ibox[0], ibox[1]), (ibox[2], ibox[3]), (255, 0, 0), 2) score = box[4] k = "%.3f" % score cv2.putText(img, k, (ibox[0] + 2, ibox[1] + 14), font, 0.6, (0, 255, 0), 2) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) #print(landmark.shape) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) cv2.circle(img, (landmark5[l][0], landmark5[l][1]), 1, color, 2) filename = '640_' + imgname print('writing', filename) cv2.imwrite(filename, img)
class FaceDetector: def __init__(self): self.gpuid = -1; self.thresh = 0.95; self.scales = [2 / 3]; self.flip = False self.detector = RetinaFace('RetinaFace/model/mnet.25', 0, self.gpuid, 'net3') def detect(self, image, left_corner_human_box=None): start = time.time() face_boxes, landmarks = self.detector.detect(image, self.thresh, scales=self.scales, do_flip=self.flip) end = time.time() for i in range(face_boxes.shape[0]): face_boxes[i] = face_boxes[i].astype(np.int) # x_min, y_min, x_max, y_max landmarks[i] = landmarks[i].astype(np.int) # leye, reye, nose, lmouth, rmouth if left_corner_human_box is not None: x_human_min = left_corner_human_box[0] y_human_min = left_corner_human_box[1] # x, y, w, h in orginal coordinate face_boxes[i][2] -= face_boxes[i][0] # w face_boxes[i][3] -= face_boxes[i][1] # h face_boxes[i][0] += x_human_min face_boxes[i][1] += y_human_min for j in range(landmarks[i].shape[0]): landmarks[i][j][0] += x_human_min landmarks[i][j][1] += y_human_min return face_boxes[:, :4], landmarks def get_faces_from_folder(self,folderPath): fileNumber = 0 for filename in os.listdir(folderPath): path = os.path.join(folderPath, filename) image = cv2.imread(path) face_boxes, landmarks = self.detect(image) for i in range(face_boxes.shape[0]): faceImage = face_preprocess.preprocess(image, face_boxes[i], landmarks[i], image_size='112,112') if not (os.path.exists("data/output")): os.makedirs("data/output") filePath = "data/output/" + str(fileNumber) + ".jpg" cv2.imwrite(filePath, faceImage) fileNumber = fileNumber + 1 def get_face_from_image(self, image): faces = list() face_boxes, landmarks = self.detect(image) for i in range(face_boxes.shape[0]): faceImage = face_preprocess.preprocess(image, face_boxes[i], landmarks[i], image_size='112,112') faces.append(faceImage) return faces, face_boxes
class FaceDetector(object): """RetinaFace detect face boxes and five facial landmarks.""" def __init__(self, model_str): """Init instance. Args: model_str ([type]): string of model checkpoint by `prefix,epoch` """ prefix, epoch = model_str.split(',') epoch = int(epoch) self.nms_threshold = 0.8 self.scales = (1024, 1980) ctx_id = 0 if is_cuda_available() else -1 self._detector = RetinaFace(prefix, epoch, ctx_id, 'net3') def predict(self, img: np.ndarray) -> tuple: """predict. Args: img (np.ndarray): [description] img (np.ndarray): [description] Returns: tuple: [description] """ scales = self._cal_scales(img) faces, landmarks = self._detector.detect(img, self.nms_threshold, scales=scales, do_flip=False) if faces is not None: faces = faces.astype(np.int32) if landmarks is not None: landmarks = landmarks.astype(np.int32) return faces, landmarks def __call__(self, img: np.ndarray): return self.predict(img) def _cal_scales(self, img): im_shape = img.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 #if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] return scales
class RetinaFaceModel: def __init__(self): gpuid = 0 self.detector = RetinaFace('./model/R50', 0, gpuid, 'net3') self.scales = [1024, 1980] self.thresh = 0.8 print('initialized retina face model') def detect_faces(self, video, output): detect_faces(video, output, 640, 360, self.detect_faces_on_img) def detect_faces_on_img(self, image): im_shape = image.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) # im_scale = 1.0 # if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False faces, landmarks = self.detector.detect(image, self.thresh, scales=scales, do_flip=flip) if faces is not None: print('find', faces.shape[0], 'faces') for i in range(faces.shape[0]): # print('score', faces[i][4]) box = faces[i].astype(np.int) # color = (255,0,0) color = (0, 0, 255) cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) # print(landmark.shape) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) cv2.circle(image, (landmark5[l][0], landmark5[l][1]), 1, color, 2) print(image.shape) return image
def process(queue_camera_info, queue_camera_img): from retinaface import RetinaFace import cv2 import numpy as np gpuid = 1 thresh = 0.7 detector = RetinaFace('./model/mnet.25', 0, gpuid, 'net3') while True: camera_info_process = queue_camera_info.get() try: # print('process is waiting') # t = time.time() with open('log.txt', 'a+') as f: f.write( time.strftime('%H:%M:%S', time.localtime()) + 'face detect ' + str(queue_camera_info.qsize()) + '\n') img = cv2.imread('/tmp/' + camera_info_process['filename']) print(img.shape) im_shape = img.shape scales = [128, 128] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] faces, landmarks = detector.detect(img, thresh, scales=scales) print(landmarks.shape) print(faces.shape) # if faces.shape[0] == 1: camera_info_process['image'] = img camera_info_process['camera_face_img'] = faces camera_info_process['landmarks'] = landmarks # else: # print(landmarks.shape) # camera_info_process['image'] = img #: print(landmarks[0]) # camera_info_process['camera_face_img'] = faces[0].reshape([1,5]) # camera_info_process['landmarks'] = landmarks[0].reshape([1,5,2]) queue_camera_img.put(camera_info_process) # print('process',time.time() - t) except: # pass print('process is error')
class FaceDetector: def __init__(self): self.face_detector = RetinaFace( gpu_id=0, network='resnet50') # Backbone: resnet50 or mobilenet self.threshold = 0.8 def filter_detections(self, faces): filtered_faces = [] if faces is not None: for face in faces: bbox, landmarks, score = face if score < self.threshold: continue bbox = bbox.astype(np.int) + [-5, -5, 5, 5 ] # broadcast bboxes with 5 px filtered_faces.append(bbox) return filtered_faces def __call__(self, frame): return self.filter_detections(self.face_detector.detect(frame))
def _main(_argv): detector = RetinaFace(FLAGS.weights_path, FLAGS.use_gpu_nms, FLAGS.nms_thresh) img = cv2.imread(FLAGS.sample_img) faces, landmarks = detector.detect(img, FLAGS.det_thresh) if faces is not None: print('found', faces.shape[0], 'faces') for i in range(faces.shape[0]): box = faces[i].astype(np.int) color = (0, 0, 255) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) cv2.circle(img, (landmark5[l][0], landmark5[l][1]), 1, color, 1) cv2.imwrite(FLAGS.save_destination, img)
def _main(_argv): detector = RetinaFace(FLAGS.weights_path, FLAGS.use_gpu_nms, FLAGS.nms_thresh) image_root = [ 'data/WIDER_test/images/', 'data/WIDER_train/images/', 'data/WIDER_val/images/' ] result_save_root = 'widerface-faces/' for i in range(0, 3): create_directory(os.path.join(result_save_root, image_root[i])) face_numbers = 0 for k in range(0, 3): for parent, dir_names, file_names in os.walk(image_root[k]): for file_name in file_names: if not file_name.lower().endswith('jpg'): continue face_numbers = 0 img = cv2.imread(os.path.join(parent, file_name), cv2.IMREAD_COLOR) faces, landmarks = detector.detect(img, FLAGS.det_thresh) print(faces.shape) if faces is not None: print('found', faces.shape[0], 'faces') for i in range(faces.shape[0]): face_numbers += 1 box = faces[i].astype(np.int) color = (0, 0, 255) crop_img = img[box[1] - 10:box[3] + 10, box[0] - 10:box[2] + 10] if (crop_img.size != 0): crop_img = image_resize(crop_img, 48, 48) crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) if not cv2.imwrite( os.path.join( result_save_root, image_root[k], file_name.replace( '.jpg', 'result_{}.jpg'.format( face_numbers))), crop_img): raise Exception("Could not write image")
class RetinaDetector(object): def __init__(self): self.thresh = 0.8 self.scales = [1024, 1980] # 0>=でGPU利用 self.gpuid = -1 self.model_path = os.path.join(os.path.dirname(__file__), 'model/retinaface-R50/R50') print("retina model path: ", self.model_path) self.detector = RetinaFace(self.model_path, 0, self.gpuid, 'net3') def detect(self, img): im_shape = img.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 #if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) processed_scales = [im_scale] flip = False faces, landmarks = self.detector.detect(img, self.thresh, scales=processed_scales, do_flip=flip) print("face shape in Retina: ", faces.shape) if faces is not None: print('find', faces.shape[0], 'faces num') return faces else: return None
def _main(_argv): detector = RetinaFace(FLAGS.weights_path, FLAGS.use_gpu_nms, FLAGS.nms_thresh) img = cv2.imread(FLAGS.sample_img) faces, landmarks = detector.detect(img, FLAGS.det_thresh) predict = Predict('data/resnet56_fer_pretrained.h5') w = 0 if faces is not None: print('found', faces.shape[0], 'faces') for i in range(faces.shape[0]): w += 1 box = faces[i].astype(np.int) crop_img = img[box[1] - 10:box[3] + 10, box[0] - 10:box[2] + 10] if (crop_img.size != 0): crop_img = image_resize(crop_img, 48, 48) crop_img = cv2.resize(crop_img, (48, 48)) crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) crop_img = np.array( crop_img.reshape( [1, crop_img.shape[0], crop_img.shape[1], 1])) result = predict.predict_emotion(crop_img) print(result)
def cropped_and_aligned(img_list): error_num = 0 # retina face initialization model = RetinaFace('model-mnet/mnet.25', 0, 0, 'net3') img_list_cropped_gray = list() img_list_cropped_color = list() img_list_160 = list() for i in range(len(img_list)): # for Gray images try: if i % 1000 == 0: print('Crop and aligned', i) detections, point = model.detect(img_list[i]) img_gray = cv2.cvtColor(img_list[i], cv2.COLOR_BGR2GRAY) img_aligned_g = align_image(img_gray, detections, point, size=100) img_cropped_g = check_size(img_aligned_g) img_list_cropped_gray.append(img_cropped_g) # for BGR images img_aligned_color = align_image(img_list[i], detections, point, size=100) img_cropped_color = check_size(img_aligned_color) img_list_cropped_color.append(img_cropped_color) # for cnn img_aligned_160 = align_image(img_list[i], detections, point, size=160) img_cropped_160 = check_size(img_aligned_160) img_list_160.append(img_cropped_160) except AttributeError: error_num += 1 print('number of error ', error_num) pass return img_list_cropped_gray, img_list_cropped_color, img_list_160
class DetectorModel: def __init__(self, args): self.detector = RetinaFace(args.retina_model, 0, args.gpu, 'net3') self.threshold = args.threshold self.scales = args.scales self.max_face_number = args.max_face_number self.counter = 0 self.image_size = args.image_size def save_image(self, image): cv2.imwrite('./Temp/{}-{}.jpg'.format(time.time(), self.counter), image) self.counter += 1 def get_all_boxes(self, img, save_img=False): faces, landmarks = self.detector.detect(img, self.threshold, scales=self.scales) sorted_index = faces[:, 0].argsort() faces = faces[sorted_index] landmarks = landmarks[sorted_index] aligned = [] # print('find', faces.shape[0], 'faces') for i in range(len(faces[:self.max_face_number])): nimg = preprocess(img, faces[i], landmarks[i], image_size=self.image_size) if save_img: self.save_image(nimg) aligned.append(nimg) return zip(aligned, faces)
class RetinaFaceModel: def __init__(self, with_tracking=False, thresh=0.8, fpd=10): gpuid = 0 self.detector = RetinaFace('./model/R50', 0, gpuid, 'net3') self.scales = [1024, 1980] self.thresh = thresh self.with_tracking = with_tracking self.frames_per_detection = fpd print('initialized retina face model') def detect_faces(self, video, output): if self.with_tracking: common.detect_faces_with_trackers(video, output, 640, 360, self.detect_faces_on_img, self.frames_per_detection) else: common.detect_faces(video, output, 640, 360, self.detect_faces_on_img) def detect_faces_on_img(self, image): im_shape = image.shape target_size = self.scales[0] max_size = self.scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) # im_scale = 1.0 # if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False faces, landmarks = self.detector.detect(image, self.thresh, scales=scales, do_flip=flip) if faces is not None: for face in faces: image = common.blur(image, face.astype(np.int)) return image, [(face[0], face[1], face[2] - face[0], face[3] - face[1]) for face in faces]
while True: # fps = video_capture.get(cv2.CAP_PROP_FPS) t1 = cv2.getTickCount() ret, frame = video_capture.read() interval = 5 time_start = time.time() if ret: print("---> %d:" % frame_index, ret) # img_corner = np.array(cv2.resize(frame, (640, 360))) if frame_index % interval == 0: img = np.array(frame) flip = False faces, landmarks = detector.detect(img, thresh, scales=scales, do_flip=flip) for i in range(faces.shape[0]): # print('score', faces[i][4]) box = faces[i].astype(np.int) # color = (255,0,0) color = (0, 0, 255) cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color, 2) out.write(frame) frame_index += 1 else: print('fine') break time_end = time.time() print('totally cost', time_end - time_start)
cv2.line(image, refPt[0], refPt[1], (0, 255, 0), 2) if oke_roi: # print("masuk draw", refRct) cv2.rectangle(image, refRct[0], refRct[1], (4, 200, 150), 2) roi_process = image[refRct[0][1]:refRct[1][1], refRct[0][0]:refRct[1][0]] offset_x = refRct[0][0] offset_y = refRct[0][1] #------------------- MAIN PROGRAM -------------------------- scales = [im_scale] flip = True if oke_roi: faces, landmarks = detector.detect(roi_process, thresh, scales=scales, do_flip=flip) else: faces, landmarks = detector.detect(image, thresh, scales=scales, do_flip=flip) rects = [] if faces is not None: print('find', faces.shape[0], 'faces') for i in range(faces.shape[0]): box = faces[i].astype(np.int) score = box[4] color = (0, 0, 255) # cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 2)
im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 #if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) print('im_scale', im_scale) for c in range(count): faces, landmarks = detector.detect(img, thresh, scales=[im_scale]) print(c, faces.shape, landmarks.shape) if faces is not None: print('find', faces.shape[0], 'faces') for i in range(faces.shape[0]): #print('score', faces[i][4]) box = faces[i].astype(np.int) #color = (255,0,0) color = (0, 0, 255) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) #print(landmark.shape) for l in range(landmark5.shape[0]): color = (0, 0, 255)
class face_detection_component: def __init__(self, model_path, url): self.thresh = 0.8 self.detector = RetinaFace(model_path, 0, -1, 'net3') self.url_face_recognition = url self.headers = {"Content-Type": "image/png"} def detection(self,img): final_results = {} count = 1 scales = [img.shape[0], img.shape[1]] im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] for c in range(count): faces, landmarks = self.detector.detect(img, self.thresh, scales=scales, do_flip=False) if faces is not None: for i in range(faces.shape[0]): box = faces[i].astype(np.int) color = (0,0,255) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2) x0, y0, x1, y1, _ = box x0 = int(x0 - 0.2*(x1 - x0)) if x0 - 0.2*(x1 - x0) >= 0 else x0 y0 = int(y0 - 0.2*(y1 - y0)) if y0 - 0.2*(y1 - y0) >= 0 else y0 x1 = int(x1 + 0.2*(x1 - x0)) if x1 + 0.2*(x1 - x0) <= im_shape[1] else x1 y1 = int(y1 + 0.2*(y1 - y0)) if y1 + 0.2*(y1 - y0) <= im_shape[0] else y1 face_image = img[y0:y1, x0:x1] name = "sconosciuto" images_str = cv2.imencode('.png', face_image)[1].tostring() response = requests.post(self.url_face_recognition, headers=self.headers, data = images_str) if response.status_code == 200: results = json.loads(response.text) name = results['name'] x0, y0, x1, y1, _ = box final_results[i] = { "x0": x0, "y0": y0, "x1": x1, "y1": y1, "name": name }
class DetectorModel: def __init__(self, args): self.detector = RetinaFace(args.retina_model, 0, args.gpu, 'net3') self.threshold = args.threshold self.scales = args.scales self.max_face_number = args.max_face_number self.counter = 0 self.image_size = [112, 112] def save_image(self, images): for img in images: cv2.imwrite(f'./Temp/{time.time()}-{self.counter}.jpg', img) self.counter += 1 def get_all_boxes(self, frame, save_img=False, need_marks=False): boxes, landmarks = self.detector.detect(frame, self.threshold, scales=self.scales) sorted_index = boxes[:, 0].argsort() boxes = boxes[sorted_index] landmarks = landmarks[sorted_index] if need_marks: return zip(landmarks, boxes) aligned = self.preprocess(frame, boxes, landmarks) if save_img: self.save_image(aligned) return zip(aligned, boxes) def get_all_boxes_from_path(self, img_paths, save_img=False): for counter, path in enumerate(img_paths): base_path, file_name = os.path.split(path) if file_name.startswith('cropped'): continue for face, _ in self.get_all_boxes(cv2.imread(path)): cv2.imwrite(f'{base_path}/cropped-{time.time()}.jpg', face) shutil.move(path, f'./Temp/raw/{file_name}') counter += 1 print('人脸检测已完成%2f%%' % ((counter * 100) / len(img_paths))) def preprocess(self, img, boxes, landmarks, **kwargs): aligned = [] if len(boxes) == len(landmarks): for bbox, landmark in zip(boxes, landmarks): margin = kwargs.get('margin', 0) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(bbox[0] - margin / 2, 0) bb[1] = np.maximum(bbox[1] - margin / 2, 0) bb[2] = np.minimum(bbox[2] + margin / 2, img.shape[1]) bb[3] = np.minimum(bbox[3] + margin / 2, img.shape[0]) ret = img[bb[1]:bb[3], bb[0]:bb[2], :] warped = cv2.resize(ret, (self.image_size[1], self.image_size[0])) aligned.append(warped) return aligned
class FacialRecognition(): def __init__(self, gpu_index=-1, arcface_model="model-r100-ii/model,0", image_size='112,112', retina_model="/model/R50"): if gpu_index >= 0: retina_ctx = mx.gpu(gpu_index) else: retina_ctx = mx.cpu() self.face_detector = RetinaFace(prefix=retina_model, epoch=0, ctx_id=gpu_index) self.face_recognition = ArcfaceModel(gpu=gpu_index, model=arcface_model, image_size=image_size) def get_scales(self, img): scales = [1024, 1980] im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 #if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] # print('im_scale', im_scale) # im_shape = img.shape # TEST_SCALES = [100, 200, 300, 400] # target_size = 400 # max_size = 1200 # im_size_min = np.min(im_shape[0:2]) # im_size_max = np.max(im_shape[0:2]) # im_scale = float(target_size) / float(im_size_min) # # prevent bigger axis from being more than max_size: # if np.round(im_scale * im_size_max) > max_size: # im_scale = float(max_size) / float(im_size_max) # scales = [float(scale) / target_size * im_scale for scale in TEST_SCALES] return scales def detect_face_and_get_embedding(self, img): thresh = 0.8 flip = False scales = self.get_scales(img) bboxes, rs = self.face_detector.detect(img, thresh, scales=scales, do_flip=flip) # print('bbox:', bboxes) if len(bboxes) <= 0: return None, None # print('landmark:', rs) if rs is not None: points = rs.astype(np.int32) # point = points[0, :].reshape((2, 5)).T nimg = preprocess(img, bboxes[0], points[0], image_size='112,112') # nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) e = np.transpose(nimg, (2, 0, 1)) embeddings = self.face_recognition.get_feature(e) return embeddings, nimg return None, None def detect_face_and_get_embedding_test(self, img): thresh = 0.8 flip = False scales = self.get_scales(img) bboxes, rs = self.face_detector.detect(img, thresh, scales=scales, do_flip=flip) if len(bboxes) <= 0: return None print('len bboxes: ',len(bboxes)) embeddings = [] if rs is not None: bboxes , points = rs # print('len total bboxes: ',len(bboxes)) for i, bbox in enumerate(bboxes): # print('bbox: ', bbox) point = points[i, :].reshape((2, 5)).T # print('point: ', point) nimg = preprocess(img, bbox, point, image_size='112,112') # nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) x = np.transpose(nimg, (2, 0, 1)) embedding = self.face_recognition.get_feature(x) embeddings.append(embedding) cv2.rectangle(img,(bbox[0],bbox[1]),(bbox[2],bbox[3]), (255,0,0), 2) return embeddings return None def detect_face_and_get_embedding_test_2(self, img): thresh = 0.8 flip = False scales = self.get_scales(img) bboxes, rs = self.face_detector.detect(img, thresh, scales=scales, do_flip=flip) if len(bboxes) <= 0: return None, None embeddings = [] bbox_list = [] if rs is not None: points = rs.astype(np.int32) for i, bbox in enumerate(bboxes): point = points[i, :].reshape((2, 5)).T nimg = preprocess(img, bbox, point, image_size='112,112') nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) x = np.transpose(nimg, (2, 0, 1)) embedding = self.face_recognition.get_feature(x) embeddings.append(embedding) bbox_list.append(bbox) return embeddings, bbox_list return None, None def get_embedding(self, img): nimg = cv2.resize(img, (112, 112)) # nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB) x = np.transpose(nimg, (2, 0, 1)) embeddings = self.face_recognition.get_feature(x) return embeddings
class VideoDetector(object): def __init__(self, arguments, mx_context): self.args = arguments self.ctx = mx_context self.model = face_model.FaceModel(args) rtpath, epoch = self.args.rt_model.split(',') self.detector = RetinaFace(rtpath, int(epoch), self.args.gpu, 'net3') self.dataset = None # Collection of features of known names self.names = {} # Names of known person self.persons = [] # List of person detected self.crop_resolution = int(self.args.image_size.split(',') [0]) # Resolution to crop person face self.pv = [float(p) for p in self.args.poses.split(',')] def prepare_faces(self, dataset_name='dataset.pkl'): image_names = os.listdir(self.args.faces_dir) face_names = set([x.split('_')[0] for x in image_names]) dataset = {} for name in face_names: images = [ cv2.imread(os.path.join(self.args.faces_dir, iname)) for iname in image_names if name in iname ] features = [ self.model.get_feature(self.model.get_input(img)) for img in images ] features = np.stack(features) dataset[name] = features dataset_path = os.path.abspath(os.path.join(self.args.faces_dir, '..')) with open(dataset_path + '/' + dataset_name, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) def detect(self): # if self.dataset is None: # self.load_features() cap = cv2.VideoCapture( self.args.in_file) # Create a VideoCapture object frame_w, frame_h = int(cap.get(3)), int( cap.get(4)) # Convert resolutions from float to integer. total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) renders = [] i = 0 frame_time = np.array([]) for _ in tqdm(range(total_frames)): start = time() ret, frame = cap.read() if ret: total_boxes, points = self.detect_faces(frame) self.identify(frame, total_boxes, points) render = self.draw_names(frame) i += 1 if i == 22: pass renders.append(render) frame_time = np.append(frame_time, time() - start) cap.release() return renders, { 'w': frame_w, 'h': frame_h }, { 'fr_exec': frame_time.mean() } def load_features(self, dataset_name='dataset.pkl'): dataset_path = os.path.abspath(os.path.join(self.args.faces_dir, '..')) with open(dataset_path + '/' + dataset_name, 'rb') as f: # Load Dataset on numpy format np_dataset = pickle.load(f) # Create dictionary with person names and their corresponding feature index i = 0 for k, v in np_dataset.items(): self.names[k] = slice(i, i + v.shape[0]) i += v.shape[0] # Transform dataset to mx NDarray format self.dataset = nd.array(np.concatenate( [v for v in np_dataset.values()]), ctx=self.ctx) def draw_names(self, frame): colors = box_colors[:len(self.persons)] for person, c in zip(self.persons, colors): b = person.box if person.name is None: cv2.rectangle(frame, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), colors[-1], 2) else: cv2.rectangle(frame, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), c, 2) cv2.putText(frame, person.name, (int(b[0]), int(b[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3, cv2.LINE_AA) return frame def draw_pose(self, frame, points, bbox): for i in range(bbox.shape[0]): box = bbox[i].astype(np.int) # color = (0, 0, 255) # cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), color, 2) if points is not None: landmark5 = points[i].astype(np.int) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) if l == 2: color = (255, 0, 0) cv2.circle(frame, (landmark5[l][0], landmark5[l][1]), 1, color, 2) poses = self.detector.check_large_pose(landmark5, box[1:]) y = 450 for t, vl in zip(pose_titles, poses): txt = str(t + ':' + str(vl)) if type(vl) == int else str( t + ':' + str(np.round(vl, 2))) cv2.putText(frame, txt, (5, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA) y += 60 return frame def name_face(self, person_face): ## Name the face of a person based on the dataset face = self.model.get_input(person_face) if face is None: return None face = nd.array(self.model.get_feature(face), ctx=self.ctx) # Calculate the similarity between the known features and the current face feature sim = nd.dot(self.dataset, face) scores = {} for known_id, index in self.names.items(): scores[known_id] = max(sim[index]).asnumpy() if max(scores.values()) > self.args.threshold_face: return max(scores, key=scores.get) else: return None def name_person(self, frame, points, box, person=None): # This function will verify if a person face is part or not of the dataset # Depending if the pose of the face is correct or if the face is in the dataset a name will be assigned if person is None: # Create a new object person = Person(points[2], box, args.threshold_l2) else: # Update variables person.pre_point = points[2] person.box = box if person.known: # Check if the person has already been checked return # Obtain face orientation and if looking to the front take a screenshot ret, l, r, u, d = self.detector.check_large_pose(points, box[1:]) if ret != 4: points = np.array([np.concatenate([points[:, 0], points[:, 1]])]) cropped_face = self.model.detector.extract_image_chips( frame, points, self.crop_resolution, 1)[0] name = self.name_face(cropped_face) if name is not None: # Face verified on the dataset person.name = name person.face = cropped_face person.known = True if person not in self.persons: self.persons.append(person) def identify(self, frame, bbox, points): # Identify the person based on face verification or coordinate approximation if not self.persons and bbox.shape[ 0]: # Check if persons list is empty and there are detected faces for i in range(bbox.shape[0]): box = bbox[i].astype(np.int) landmark5 = points[i].astype(np.int) self.name_person(frame, landmark5, box) elif len( self.persons ) == bbox.shape[0]: # Find corresponding person for each coordinate for i in range(bbox.shape[0]): box = bbox[i].astype(np.int) landmark5 = points[i].astype(np.int) distances = np.array( [p.l2_distance(landmark5[2]) for p in self.persons]) person_index = np.where(distances >= 0, distances, np.inf).argmin() self.name_person(frame, landmark5, box, person=self.persons[person_index]) elif len( self.persons ) < bbox.shape[0]: # Identify previous persons and add new ones boxes = [box for box in bbox.astype(np.int)] landmarks = [point for point in points.astype(np.int)] centers = [p[2] for p in landmarks] distances = np.array([ p.l2_distance(center) for p in self.persons for center in centers ]).reshape(len(self.persons), -1) known_index = np.where(distances >= 0, distances, np.inf).argmin(axis=1) unknown_index = [ i for i in range(distances.shape[-1]) if np.all(distances[:, i] == -1) ] # update known persons for ki, p in zip(known_index, self.persons): self.name_person(frame, landmarks[ki], boxes[ki], person=p) # add unknown persons for uk in unknown_index: self.name_person(frame, landmarks[uk], boxes[uk]) elif len(self.persons) > bbox.shape[0]: if not bbox.shape[0]: # There is no faces detected del self.persons[:] # Empty the hole list return # Identify previous persons and remove the ones that disappeared boxes = [box for box in bbox.astype(np.int)] landmarks = [point for point in points.astype(np.int)] centers = [p[2] for p in landmarks] distances = np.array([ p.l2_distance(center) for p in self.persons for center in centers ]).reshape(len(self.persons), -1) center_index = np.array([i for i in distances if np.any(i >= 0)]) if bbox.shape[0] > 1: center_index = np.where(center_index >= 0, center_index, np.inf).argmin(axis=1) else: center_index = np.array([0]) known_person = np.array([ i for i in range(distances.shape[0]) if np.any(distances[i] >= 0) ]) d_person = [ i for i in range(distances.shape[0]) if np.all(distances[i] == -1) ] pre_points = [p.pre_point for p in self.persons] names = [p.name for p in self.persons] # update known persons for ci, ki in zip(center_index, known_person): self.name_person(frame, landmarks[ci], boxes[ci], person=self.persons[ki]) # delete disappeared persons from list for d in d_person: del self.persons[d] def detect_faces(self, frame): if self.dataset is None: self.load_features() # run detector results = self.detector.detect(frame, threshold=args.threshold) if results is not None: return results
def crop(dstpath, number, img_angle_paths): """ 按角度通过多进程进行裁图 :param dstpath: 裁剪图、检测图、日志和已裁剪的保存根目录 :param number: 进程编号 :param img_angle_paths: 按角度将文件分成列表 """ print("当前进程:%s" % os.getpid()) print(dstpath) print(number) thresh = 0.8 scales = [1024, 1980] count = 1 gpuid = 0 detector = RetinaFace('./mnet.25/mnet.25', 0, gpuid, 'net3') # 获得存放图片的绝对路径列表 imgPaths = [] for path in img_angle_paths: imgPaths += glob.glob(os.path.join(path, "*.jpg")) imgPaths.sort() # 在dstpath文件夹下创建cropImage、detect_result、log、iscropped文件夹 detectresult_path = os.path.join(dstpath, "detect_result") cropresult_path = os.path.join(dstpath, "crop_result") log_path = os.path.join(dstpath, "log") iscropped_path = os.path.join(dstpath, "iscropped") if not os.path.isdir(detectresult_path): os.makedirs(detectresult_path) if not os.path.isdir(cropresult_path): os.makedirs(cropresult_path) if not os.path.isdir(log_path): os.makedirs(log_path) if not os.path.isdir(iscropped_path): os.makedirs(iscropped_path) day = time.strftime("%Y_%m_%d %H:%M", time.localtime()) # 获取当前时间 # 按进程编号创建日志文本 log_txt = os.path.join(log_path, str(number) + "_日志.txt") # 日志路径 log_f = open(log_txt, "a+") log_f.write("-----------------------------" + "\n") log_f.write("当前时间:" + str(day) + "\n") log_f.write("当前进程:%s" % os.getpid() + "\n") is_cropped_img_pathList = [] is_cropped_txt = os.path.join(iscropped_path, str(number) + "_已裁剪.txt") is_cropped_f = open(is_cropped_txt, "a+") if is_cropped_f.readable(): for imgname in is_cropped_f.readlines(): imgname = imgname.strip() print(imgname) is_cropped_img_pathList.append(imgname) is_cropped_img_pathList.sort() print(is_cropped_img_pathList) print("已经裁剪的图像总数:", len(is_cropped_img_pathList)) log_f.write("-----------------------------" + "\n") log_f.write("已经裁剪的图像总数:" + str(len(is_cropped_img_pathList)) + "\n") print("总共图片:" + str(len(imgPaths))) log_f.write("当前进程" + str(os.getpid()) + "总共图片:" + str(len(imgPaths)) + "\n") crop_face = None for j, imgPath in enumerate(imgPaths): print("第" + str(j) + "张") print("original img path:", imgPath) log_f.write("第" + str(j) + "张" + "\n") log_f.write("原图路径:" + str(imgPath) + "\n") img = cv2.imdecode(np.fromfile(imgPath, np.uint8), cv2.IMREAD_COLOR) imgPath = 'r%s' % imgPath # imgpathlist = imgPath.split("/") imgpathlist = imgPath.split("\\") angle_dir = imgpathlist[-2] imgFileName = imgpathlist[-1] print(angle_dir, imgFileName) # 创建crop角度子文件夹 cropImg_angleDir = os.path.join(cropresult_path, angle_dir) # 创建detect角度子文件夹 detectResult_angleDir = os.path.join(detectresult_path, angle_dir) if not os.path.isdir(cropImg_angleDir): os.makedirs(cropImg_angleDir) if not os.path.isdir(detectResult_angleDir): os.makedirs(detectResult_angleDir) img_angle_name = angle_dir + "_" + imgFileName if img_angle_name in is_cropped_img_pathList: print(img_angle_name + "已经裁剪") log_f.write(img_angle_name + "已经裁剪" + "\n") continue log_f.write("开始裁剪图片:" + str(img_angle_name) + "\n") imgCopy = img.copy() im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False for c in range(count): faces, landmarks = detector.detect(img, thresh, scales=scales, do_flip=flip) try: if faces is not None: print('旋转前找到', faces.shape[0], '个脸') log_f.write('旋转前找到' + str(faces.shape[0]) + '人脸') log_f.write("\n") for i in range(faces.shape[0]): # print('score', faces[i][4]) box = faces[i].astype(np.int) color = (0, 0, 255) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 4) print("face box position:x1:{}, y1:{}, x2:{}, y2:{}".format(box[0], box[1], box[2], box[3])) log_f.write( "face box position:x1:{}, y1:{}, x2:{}, y2:{}".format(box[0], box[1], box[2], box[3])) log_f.write("\n") if box[0] < 50: continue #原裁图方式 # w = box[2] - box[0] + 60 # h = box[3] - box[1] + 20 # crop_face = imgCopy[box[1] - 40:box[1] + h, box[0] - 30:box[0] + w] #现按左右脸和0度分开裁,两个角点的位置不一致 x1, y1 = box[0], box[1] x2, y2 = box[2], box[3] if "-" in angle_dir: print("-") crop_face = imgCopy[y1 - 5: y2 + 5, x1: x2 + 50] elif "+" in angle_dir: print("+") # w = x2 - x1 # h = y2 - y1 crop_face = imgCopy[y1 - 5: y2 + 5, x1 - 50: x2] elif "0_0" == angle_dir: print("0_0") crop_face = imgCopy[y1 - 5: y2 + 5, x1 - 60: x2 + 60] else: crop_face = imgCopy[y1 - 5: y2 + 5, x1 - 30: x2 + 30] crop_face = cv2.resize(crop_face, (224, 224), interpolation=cv2.INTER_LINEAR) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) # print(landmark5.shape) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) cv2.circle(img, (landmark5[l][0], landmark5[l][1]), 1, color, 3) """ #旋转对齐 left_eye, right_eye = landmark5[0], landmark5[1] print("旋转前左眼:", left_eye, end=" ") print("右眼:", right_eye) dy = right_eye[1] - left_eye[1] dx = right_eye[0] - left_eye[0] angle = math.atan2(dy, dx) * 180. / math.pi print("左右眼角度:", angle) eye_center = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2) # 左右眼中心点坐标 # 旋转 rotate_matrix = cv2.getRotationMatrix2D(eye_center, angle, scale=1) rotated_img = cv2.warpAffine(img, rotate_matrix, (img.shape[1], img.shape[0])) imgCopy = rotated_img.copy() # 再次检测 rotate_faces, rotate_landmarks = detector.detect(rotated_img, thresh, scales=scales, do_flip=flip) if rotate_faces is not None: print('旋转后find', rotate_faces.shape[0], 'faces') for i in range(rotate_faces.shape[0]): box = rotate_faces[i].astype(np.int) cv2.rectangle(rotated_img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 4) print("旋转后脸标记框的位置:x1:{}, y1:{}, x2:{}, y2:{}".format(box[0], box[1], box[2], box[3])) w = box[2] - box[0] + 50 h = box[3] - box[1] + 50 crop_face = imgCopy[box[1] - 30:box[1] + h, box[0] - 30:box[0] + w] crop_face = cv2.resize(crop_face, (224, 224), interpolation=cv2.INTER_LINEAR) if rotate_landmarks is not None: rotate_landmark5 = rotate_landmarks[i].astype(np.int) print("旋转后左眼:", landmark5[0], end=" ") print("右眼:", rotate_landmark5[1]) for l in range(rotate_landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3: color = (0, 255, 0) cv2.circle(rotated_img, (rotate_landmark5[l][0], rotate_landmark5[l][1]), 1, color, 3) """ detect_imageFileName = os.path.join(detectResult_angleDir, imgFileName) _, img = cv2.imencode('.jpg', img) img.tofile(detect_imageFileName) print('writing detect_imgPath:' + str(detect_imageFileName)) log_f.write('writing detect_imgPath:' + str(detect_imageFileName) + "\n") crop_imageFileName = os.path.join(cropImg_angleDir, imgFileName) _, crop_face = cv2.imencode('.jpg', crop_face) crop_face.tofile(crop_imageFileName) print('writing crop_imgPath:' + str(crop_imageFileName)) log_f.write('writing crop_imgPath:' + str(crop_imageFileName) + "\n") is_cropped_f.write(img_angle_name + "\n") scales = [1024, 1980] except Exception as e: print("没有裁剪的图像:" + str(angle_dir) + str(imgFileName)) log_f.write("没有裁剪的图像:" + str(angle_dir) + str(imgFileName) + "\n") log_f.write("错误信息:" + str(e) + "\n") log_f.write("\n") finally: scales = [1024, 1980] log_f.close() # 关闭日志文件 is_cropped_f.close()
cctv = '/media/HDD/LATIHAN_FIAN/insightFace/insightface/RetinaFace/ch04_20200229143636.mp4' cap = cv2.VideoCapture(cctv) while True: grab, img = cap.read() if grab: img = cv2.resize(img, (960, 480), cv2.INTER_AREA) print(img.shape) scales = [im_scale] flip = True # do detection faces_xyxy, landmarks = detector.detect(img, thresh, scales=scales, do_flip=True) print(faces_xyxy, landmarks.shape) if faces_xyxy is not None: print('find', faces_xyxy.shape[0], 'faces') faces_xywh = [] score_fit = [] for i in range(faces_xyxy.shape[0]): box = faces_xyxy[i].astype(np.int) bbox_xywh = [ int(box[0]), int(box[1]), int(box[2] - box[0]), int(box[3] - box[1]) ]
# prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] return scales # 读取图片 img = cv2.imread('../images/t2.jpg') detector = RetinaFace("../models/mnet.25", 0, -1, 'net3') threshold = 0.8 scale = get_scale(img.shape) bbox_1, landmarks_1 = detector.detect(img, threshold, scale) print(bbox_1) print(landmarks_1) faces = bbox_1.tolist() for i in range(len(faces)): img = cv2.rectangle(img, (int(faces[i][0]), int(faces[i][1])), (int(faces[i][2]), int(faces[i][3])), (0, 0, 255), 2, 8, 0) cv2.imshow("img", img) cv2.waitKey(0)
im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) #im_scale = 1.0 # if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) print('im_scale', im_scale) scales = [im_scale] flip = False img = img[:, :, 0:3] bounding_boxes, landmarks = detector.detect(img, thresh, scales=scales, do_flip=flip) nrof_faces = bounding_boxes.shape[0] print(bounding_boxes) print(landmarks) print(landmarks[0, :, :]) nimg = face_preprocess.preprocess(img, bounding_boxes[0, :], landmarks[0, :, :], image_size='112,112') output_filename_n = '/home/tmt/Documents/insightface/RetinaFace/detector_test3.jpg' # misc.imsave(output_filename_n, nimg) cv2.imwrite(output_filename_n, nimg) # if nrof_faces == 1: # nimg = face_preprocess.preprocess(
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') thresh = 0.8 gpuid = 0 detector = RetinaFace('/content/insightface/models/R50', 0, gpuid, 'net3') # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = cv2.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] im_shape = img.shape scales = [1024, 1980] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False bounding_boxes, _ = detector.detect(img, thresh, scales=scales, do_flip=flip) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = cv2.resize( cropped, (args.image_size, args.image_size), interpolation=cv2.INTER_LINEAR) nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) cv2.imwrite(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
import sys import numpy as np import datetime import os import glob from retinaface import RetinaFace detector = RetinaFace(gpu=0) img_path = 'data/retinaface/val/images' dir = os.listdir(img_path) for im in dir: img = cv2.imread(os.path.join(img_path, im)) faces, landmarks = detector.detect(img, scales_index=1, do_flip=True) if faces is not None: print('find', faces.shape[0], 'faces') for i in range(faces.shape[0]): box = faces[i].astype(np.int) color = (0, 0, 255) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, 2) title = "%.2f" % (faces[i][4]) p3 = (max(box[0], 15), max(box[1], 15)) cv2.putText(img, title, p3, cv2.FONT_ITALIC, 0.6, (0, 255, 0), 1) if landmarks is not None: landmark5 = landmarks[i].astype(np.int) for l in range(landmark5.shape[0]): color = (0, 0, 255) if l == 0 or l == 3:
def fddb_pre_txt(args): prefix_path = args.model_path #模型路径及名称 epoch_num = args.epoch_num data_path = args.data_path #测试数据集路径 save_dir = args.save_path #生成的TXT保存路径文件夹 #thresh = 0.5#0.8 print('thresh_before: ', args.thresh) thresh = args.thresh #/100 print('thresh: ', thresh) scales = [1024, 1980] target_size = scales[0] max_size = scales[1] #count = 1 t1 = time.time() gpuid = 2 flip = False detector = RetinaFace(prefix_path, epoch_num, args.gpuid, args.network, args.dense_anchor) abs_dir = os.getcwd() count = 1 img_num = 2845 #print('共{}张图片'.format(img_mun))i if not os.path.exists(save_dir): os.mkdir(save_dir) with open(os.path.join(save_dir, 'results.txt'), 'w') as f: for i in range(10): t2 = time.time() print('all:{}'.format(t2 - t1)) print('Processing...... ', i + 1) if i == 9: txt_dir = os.path.join(data_path, 'FDDB-folds', 'FDDB-fold-10.txt') else: txt_dir = os.path.join(data_path, 'FDDB-folds', 'FDDB-fold-0' + str(i + 1) + '.txt') #print('txt_dir: ',txt_dir) with open(txt_dir, 'r') as f1: lines = f1.readlines() for line in lines: t1 = time.time() #print('#######*****', i) line = line.strip('\n\t') img_path = os.path.join(abs_dir, 'FDDB/originalPics', line + '.jpg') #print('img_path:',img_path) f.write(line + '\n') im = cv2.imread(img_path) #print('im: ',im) im_shape = im.shape ''' im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) scales = [im_scale] ''' #TEST_SCALES = [500, 800, 1100, 1400, 1700] TEST_SCALES = [500, 800, 1400] target_size = 800 max_size = 1200 #im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [ float(scale) / target_size * im_scale for scale in TEST_SCALES ] faces, scores = detector.detect(im, thresh, scales, do_flip=flip) t3 = time.time() if faces is None: f.write(str(0) + '\n') else: print('find', faces.shape[0], 'faces') f.write(str(faces.shape[0]) + '\n') for i in range(faces.shape[0]): box = faces[i].astype(np.int) f.write( str(box[0]) + ' ' + str(box[1]) + ' ' + str(box[2] - box[0]) + ' ' + str(box[3] - box[1]) + ' ' + str(scores[i][0]) + '\n') count += 1 print('cout{} , last{}'.format(count, img_num - count))
def begin(self): count = 1 gpuid = self.gpu_id # ID of the GPU to be used # Loading model for face detection detector = RetinaFace(self.detection_model, 0, gpuid, 'net3') fvs = None if self.input_type: # Reading input from camera try: fvs = WebcamVideoStream(src=self.source).start() # inside the office # time.sleep(0.5) except: print('bad link') else: fvs = FileVideoStream(self.source).start() # time.sleep(1) # Start fps counter fps = FPS().start() # Start folder reading and copying to second folder in thread thread = threading.Thread(target=self.readFolder, args=()) thread.daemon = True thread.start() cnt = 0 # Main loop while True: img = fvs.read() scales = self.scales # print(img.shape) # print(scales[1]) im_shape = img.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] flip = False for c in range(count): faces, landmarks = detector.detect(img, self.detection_threshold, scales=scales, do_flip=flip) if faces is not None: print('Found ' + str(faces.shape[0]) + ' face(s)') for i in range(faces.shape[0]): box = faces[i].astype(np.int) # Set filename filename = str(datetime.datetime.now()).replace(":", "_").replace(".", "_").replace("-", "_").replace(' ', '_') + '.jpg' # Calculate cropping area x = box[3] - box[1] y = box[2] - box[0] if x > 45 and y > 22: # print("x: " + str(x)) # print("y: " + str(y)) center_y = box[1] + ((box[3] - box[1])/2) # calculating center of the x side center_x = box[0] + ((box[2] - box[0])/2) # calculating center of the y side rect_y = center_y - self.img_size/2 # calculating starting x of rectangle rect_x = center_x - self.img_size/2 # calculating starting y of rectangle # If rect is True draw rectangle around face if self.rect: # Rectangle around cropping area, we put more than img_size because sometimes borders of rectangle also get cropped color = (0,255,0) cv2.rectangle(img, (rect_x, rect_y), (rect_x + self.img_size + 3, rect_y + self.img_size + 3), color, 2) # If txt is True put text with coordinates of the face rectangle if self.txt: font = cv2.FONT_HERSHEY_SIMPLEX text = 'x: ' + str(box[0]) + '; y: ' + str(box[1]) # + ' ' + str(box[2]) + ' ' + str(box[3]) cv2.putText(img,text,(50,50), font, 1, (0,255,255), 2, cv2.LINE_AA) try: cv2.imwrite(self.path_to_watch + '/' + filename, img) except: print('Folder not found!') # If show_frame True view frame on screen - this makes system a bit slower, because it consumes part of resources to show this frame if self.show_frame: cv2.imshow('image', img) # Update fps counter fps.update() if cv2.waitKey(1) & 0xFF == ord('q'): break fps.stop() print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) cv2.destroyAllWindows() fvs.stop()
out.write(frames[i]) out.release() break counter = counter + 1 im_shape = im.shape target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) faces, landmarks = detector.detect(im, thresh, scales=[im_scale], do_flip=flip) if faces is not None: #For each face, we draw a rectangle for i in range(faces.shape[0]): box = faces[i].astype(np.int) cv2.rectangle(im, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) frames.append(im) if counter % fps == 0: print(counter) cap.release()