def recognize_iris(frame, detector, estimator, estimator2, out_frame=None): _, img128, scale, pad = iut.resize_pad(frame[:, :, ::-1]) input_data = img128.astype('float32') / 127.5 - 1.0 input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0) # inference # Face detection preds = detector.predict([input_data]) detections = iut.detector_postprocess( preds, anchor_path="../../face_recognition/facemesh/anchors.npy") # display bbox if args.bbox: detections2 = iut.denormalize_detections(detections[0].copy(), scale, pad) display_hand_box(out_frame, detections2) # Face landmark estimation if detections[0].size != 0: imgs, affines, box = iut.estimator_preprocess(frame[:, :, ::-1], detections, scale, pad) landmarks = np.zeros((imgs.shape[0], 1404)) normalized_landmarks = np.zeros((imgs.shape[0], 468, 3)) confidences = np.zeros((imgs.shape[0], 1)) for i in range(imgs.shape[0]): landmark, confidences[i, :] = estimator.predict( [imgs[i:i + 1, :, :, :]]) normalized_landmark = landmark / 192.0 normalized_landmarks[i, :, :] = fut.denormalize_landmarks( normalized_landmark, affines) landmarks[i, :] = landmark #Added for i in range(len(normalized_landmark)): landmark, confidence = normalized_landmarks[i], confidences[i] draw_landmarks_face(out_frame, landmark[:, :2], size=1) # Iris landmark estimation imgs2, origins = iut.iris_preprocess(imgs, landmarks) eyes = np.zeros((imgs2.shape[0], 213)) iris = np.zeros((imgs2.shape[0], 15)) for i in range(imgs2.shape[0]): eyes[i, :], iris[i, :] = estimator2.predict( [imgs2[i:i + 1, :, :, :]]) eyes, iris = iut.iris_postprocess(eyes, iris, origins, affines) for i in range(len(eyes)): draw_eye_iris(out_frame, eyes[i, :, :16, :2], iris[i, :, :, :2], size=2)
def recognize_from_image(): # prepare input data src_img = cv2.imread(args.input) _, img128, scale, pad = fut.resize_pad(src_img[:,:,::-1]) input_data = img128.astype('float32') / 127.5 - 1.0 input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0) # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Net(DETECTION_MODEL_PATH, DETECTION_WEIGHT_PATH, env_id=env_id) estimator = ailia.Net(LANDMARK_MODEL_PATH, LANDMARK_WEIGHT_PATH, env_id=env_id) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for _ in range(5): start = int(round(time.time() * 1000)) # Face detection preds = detector.predict([input_data]) detections = fut.detector_postprocess(preds) # Face landmark estimation if detections[0].size != 0: imgs, affines, box = fut.estimator_preprocess(src_img[:,:,::-1], detections, scale, pad) draw_roi(src_img, box) estimator.set_input_shape(imgs.shape) landmarks, confidences = estimator.predict([imgs]) normalized_landmarks = landmarks / 192.0 # postprocessing landmarks = fut.denormalize_landmarks(normalized_landmarks, affines) for i in range(len(landmarks)): landmark, confidence = landmarks[i], confidences[i] # if confidence > 0: # Can be > 1, no idea what it represents draw_landmarks(src_img, landmark[:,:2], size=1) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: # Face detection preds = detector.predict([input_data]) detections = fut.detector_postprocess(preds) # Face landmark estimation if detections[0].size != 0: imgs, affines, box = fut.estimator_preprocess(src_img[:,:,::-1], detections, scale, pad) draw_roi(src_img, box) estimator.set_input_shape(imgs.shape) landmarks, confidences = estimator.predict([imgs]) normalized_landmarks = landmarks / 192.0 # postprocessing landmarks = fut.denormalize_landmarks(normalized_landmarks, affines) for i in range(len(landmarks)): landmark, confidence = landmarks[i], confidences[i] # if confidence > 0: # Can be > 1, no idea what it represents draw_landmarks(src_img, landmark[:,:2], size=1) cv2.imwrite(args.savepath, src_img) print('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = ailia.get_gpu_environment_id() print(f'env_id: {env_id}') detector = ailia.Net(DETECTION_MODEL_PATH, DETECTION_WEIGHT_PATH, env_id=env_id) estimator = ailia.Net(LANDMARK_MODEL_PATH, LANDMARK_WEIGHT_PATH, env_id=env_id) capture = get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = webcamera_utils.calc_adjust_fsize( f_h, f_w, IMAGE_HEIGHT, IMAGE_WIDTH ) writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None while(True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break frame = np.ascontiguousarray(frame[:,::-1,:]) _, img128, scale, pad = fut.resize_pad(frame[:,:,::-1]) input_data = img128.astype('float32') / 127.5 - 1.0 input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0) # inference # Face detection preds = detector.predict([input_data]) detections = fut.detector_postprocess(preds) # Face landmark estimation if detections[0].size != 0: imgs, affines, box = fut.estimator_preprocess(frame[:,:,::-1], detections, scale, pad) draw_roi(frame, box) dynamic_input_shape = False if dynamic_input_shape: estimator.set_input_shape(imgs.shape) landmarks, confidences = estimator.predict([imgs]) normalized_landmarks = landmarks / 192.0 landmarks = fut.denormalize_landmarks(normalized_landmarks, affines) else: landmarks = np.zeros((imgs.shape[0], 468, 3)) confidences = np.zeros((imgs.shape[0], 1)) for i in range(imgs.shape[0]): landmark, confidences[i,:] = estimator.predict([imgs[i:i+1,:,:,:]]) normalized_landmark = landmark / 192.0 landmarks[i,:,:] = fut.denormalize_landmarks(normalized_landmark, affines) for i in range(len(landmarks)): landmark, confidence = landmarks[i], confidences[i] # if confidence > 0: # Can be > 1, no idea what it represents draw_landmarks(frame, landmark[:,:2], size=1) cv2.imshow('frame', frame) # save results if writer is not None: writer.write(frame) capture.release() cv2.destroyAllWindows() print('Script finished successfully.') pass