def main(): global DELAY global KEEP_RUNNING global TARGET_DEVICE global RUN global is_async_mode mode = 'std' input_stream = 1 cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): print("Cap wasn't open") return -1 if input_stream: cap.open(input_stream) DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) cur_request_id = 0 next_request_id = 1 infer_network = Network() infer_network_pose = Network() infer_network_gaze = Network() plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model( './face_detection/face_detection.xml', TARGET_DEVICE, 1, 1, 2, ) n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model( './pose/pose.xml', TARGET_DEVICE, 1, 3, 2, None, plugin)[1] infer_network_gaze.load_model('./gaze_estimation/gaze.xml', TARGET_DEVICE, 3, 1, 2, None, plugin) predictor = dlib.shape_predictor( r"./landmarks/shape_predictor_68_face_landmarks.dat") if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") while cap.isOpened: ret, frame = cap.read() if not ret: KEEP_RUNNING = False break if frame is None: KEEP_RUNNING = False log.error("ERROR! blank FRAME grabbed") break key = cv2.waitKey(1) & 0xFF initial_wh = [cap.get(3), cap.get(4)] in_frame_fd = cv2.resize(frame, (w_fd, h_fd)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd)) key_pressed = cv2.waitKey(int(DELAY)) if key == ord('n'): mode = 'nar' if key == ord('p'): mode = 'std' if is_async_mode: infer_network.exec_net(next_request_id, in_frame_fd) else: infer_network.exec_net(cur_request_id, in_frame_fd) if infer_network.wait(cur_request_id) == 0: res = infer_network.get_output(cur_request_id) faces = face_detection(res, initial_wh) if len(faces) == 1: for res_hp in faces: xmin, ymin, xmax, ymax = res_hp head_pose = frame[ymin:ymax, xmin:xmax] in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp)) in_frame_hp = in_frame_hp.transpose((2, 0, 1)) in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp)) infer_network_pose.exec_net(cur_request_id, in_frame_hp) infer_network_pose.wait(cur_request_id) angle_y_fc = infer_network_pose.get_output( 0, "angle_y_fc")[0] angle_p_fc = infer_network_pose.get_output( 0, "angle_p_fc")[0] angle_r_fc = infer_network_pose.get_output( 0, "angle_r_fc")[0] head_pose_angles = np.array( [angle_y_fc, angle_p_fc, angle_r_fc], dtype='float32') head_pose_angles = head_pose_angles.transpose() frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) selection = (xmin, ymin, xmax, ymax) face = dlib.rectangle(*selection) landmarks = predictor(frame2, face) def midpoint(p1, p2): return int((p1.x + p2.x) / 2), int((p1.y + p2.y) / 2) try: right_bottom_point = (landmarks.part(36).x, landmarks.part(36).y) right_top_point = (landmarks.part(39).x, landmarks.part(39).y) rcenter_top = midpoint(landmarks.part(37), landmarks.part(38)) rcenter_bottom = midpoint(landmarks.part(41), landmarks.part(40)) h_length = hypot( (right_bottom_point[0] - right_bottom_point[0]), (right_top_point[1] - right_bottom_point[1])) v_length = hypot((rcenter_top[0] - rcenter_bottom[0]), (rcenter_top[1] - rcenter_bottom[1])) left_sq = (landmarks.part(36).x - 22, landmarks.part(37).y - 13) right_sq = (landmarks.part(39).x + 11, landmarks.part(40).y + 20) right_eye = frame[left_sq[1]:right_sq[1], left_sq[0]:right_sq[0]] right_midpoint = ( int((right_bottom_point[0] + right_top_point[0]) / 2), int((right_bottom_point[1] + right_top_point[1]) / 2)) in_frame_right_eye = cv2.resize(right_eye, (60, 60)) in_frame_right_eye = in_frame_right_eye.transpose( (2, 0, 1)) in_frame_right_eye = in_frame_right_eye.reshape( (1, 3, 60, 60)) left_bottom_point = (landmarks.part(42).x, landmarks.part(45).y) left_top_point = (landmarks.part(45).x, landmarks.part(45).y) lcenter_top = midpoint(landmarks.part(43), landmarks.part(44)) lcenter_bottom = midpoint(landmarks.part(47), landmarks.part(46)) hor_length = hypot( (left_bottom_point[0] - left_top_point[0]), (left_bottom_point[1] - left_top_point[1])) ver_length = hypot( (lcenter_top[0] - lcenter_bottom[0]), (lcenter_top[1] - lcenter_bottom[1])) left1_sq = (landmarks.part(42).x - 11, landmarks.part(43).y - 13) right1_sq = (landmarks.part(45).x + 22, landmarks.part(46).y + 20) left_eye = frame[left1_sq[1]:right1_sq[1], left1_sq[0]:right1_sq[0]] left_midpoint = ( int((left_bottom_point[0] + left_top_point[0]) / 2), int((left_bottom_point[1] + left_top_point[1]) / 2)) in_frame_left_eye = cv2.resize(left_eye, (60, 60)) in_frame_left_eye = in_frame_left_eye.transpose( (2, 0, 1)) in_frame_left_eye = in_frame_left_eye.reshape( (1, 3, 60, 60)) infer_network_gaze.exec_net_g(cur_request_id, in_frame_left_eye, in_frame_right_eye, head_pose_angles) infer_network_gaze.wait(cur_request_id) gaze_vector = infer_network_gaze.get_output( 0, 'gaze_vector') norm = np.linalg.norm(gaze_vector) gaze_vector = gaze_vector / norm frame = draw(mode, frame, left1_sq, right1_sq, left_bottom_point, left_top_point, landmarks, lcenter_top, lcenter_bottom, right_bottom_point, left_sq, right_sq, right_top_point, rcenter_top, rcenter_bottom, xmax, xmin, gaze_vector, left_midpoint, right_midpoint) except: continue if key == ord('r'): RUN = True if key == ord('s'): RUN = False if RUN: if (hor_length / ver_length > 4) and not (h_length / v_length > 4): pyautogui.click(button='left') if (h_length / v_length > 4) and not (hor_length / ver_length > 4): pyautogui.click(button='right') current_position = autopy.mouse.location() x_new = (int(25 * -gaze_vector[0][0] + current_position[0]), int(25 * -gaze_vector[0][1] + current_position[1])) try: autopy.mouse.move(x_new[0], x_new[1]) except ValueError: continue cv2.imshow('Navigation', frame) if key_pressed == 27: print("Attempting to stop background threads") KEEP_RUNNING = False break if is_async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id if key == ord("q"): break infer_network.clean() infer_network_pose.clean() cap.release() cv2.destroyAllWindows()
def main(): """ Load the network and parse the output. :return: None """ global INFO global DELAY global CLIENT global KEEP_RUNNING global POSE_CHECKED global TARGET_DEVICE global is_async_mode log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = args_parser().parse_args() logger = log.getLogger() check_args() input_stream = 0 cap = cv2.VideoCapture(input_stream) if not cap.isOpened(): logger.error("ERROR! Unable to open video source") return if input_stream: cap.open(input_stream) # Adjust DELAY to match the number of FPS of the video file DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS) # Init inference request IDs cur_request_id = 0 next_request_id = 1 # Initialise the class infer_network = Network() infer_network_pose = Network() infer_network_landmarks = Network() infer_network_gaze = Network() # Load the network to IE plugin to get shape of input layer plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model('./face_detection/face_detection.xml', TARGET_DEVICE, 1, 1, 2, args.cpu_extension) n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model('./pose/pose.xml', TARGET_DEVICE, 1, 3, 2, args.cpu_extension, plugin)[1] n_fe, c_fe, h_fe, w_fe = infer_network_landmarks.load_model('./landmarks/landmarks.xml', TARGET_DEVICE, 1, 1, 2, args.cpu_extension, plugin)[1] infer_network_gaze.load_model('./gaze_estimation/gaze.xml', TARGET_DEVICE, 3, 1, 2, args.cpu_extension, plugin) if is_async_mode: print("Application running in async mode...") else: print("Application running in sync mode...") ret, frame = cap.read() current_x = 951 current_y = 455 while ret: ret, frame = cap.read() if not ret: KEEP_RUNNING = False break if frame is None: KEEP_RUNNING = False log.error("ERROR! blank FRAME grabbed") break initial_wh = [cap.get(3), cap.get(4)] in_frame_fd = cv2.resize(frame, (w_fd, h_fd)) # Change data layout from HWC to CHW in_frame_fd = in_frame_fd.transpose((2, 0, 1)) in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd)) key_pressed = cv2.waitKey(int(DELAY)) if is_async_mode: # Async enabled and only one video capture infer_network.exec_net(next_request_id, in_frame_fd) else: # Async disabled infer_network.exec_net(cur_request_id, in_frame_fd) # Wait for the result if infer_network.wait(cur_request_id) == 0: # Results of the output layer of the network res = infer_network.get_output(cur_request_id) # Parse face detection output faces = face_detection(res, args, initial_wh) if len(faces) == 1: # Look for poses for res_hp in faces: xmin, ymin, xmax, ymax = res_hp head_pose = frame[ymin:ymax, xmin:xmax] in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp)) in_frame_hp = in_frame_hp.transpose((2, 0, 1)) in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp)) infer_network_pose.exec_net(cur_request_id, in_frame_hp) infer_network_pose.wait(cur_request_id) # Parse outputs angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc")[0] angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc")[0] angle_r_fc = infer_network_pose.get_output(0, "angle_r_fc")[0] head_pose_angles = np.array([angle_y_fc , angle_p_fc, angle_r_fc], dtype = 'float32') head_pose_angles = head_pose_angles.transpose() in_frame_eye = cv2.resize(head_pose, (w_fe, h_fe)) in_frame_eye = in_frame_eye.transpose((2, 0, 1)) in_frame_eye = in_frame_eye.reshape((n_fe, c_fe, h_fe, w_fe)) infer_network_landmarks.exec_net(cur_request_id, in_frame_eye) infer_network_landmarks.wait(cur_request_id) align_fc3 = infer_network_landmarks.get_output(0, "align_fc3") align_fc3 = [align_fc3[0][n:n+2] for n in range(0, len(align_fc3[0]), 2)] width = head_pose.shape[1] height = head_pose.shape[0] rx = xmin + int(align_fc3[12][0] * width)-15 ry = ymin + int(align_fc3[12][1] * height) rx2 = xmin + int(align_fc3[14][0] * width) ry2 = (ymin + int(align_fc3[12][1] * height)) + (rx2-rx-15) frame = cv2.rectangle(frame, (rx,ry), (rx2,ry2), (255, 255, 255), 2) right_eye = frame[ry:ry2, rx:rx2] right_midpoint = (int((rx+rx2) / 2), int((ry+ry2) / 2)) in_frame_right_eye = cv2.resize(right_eye, (60, 60)) in_frame_right_eye = in_frame_right_eye.transpose((2, 0, 1)) in_frame_right_eye = in_frame_right_eye.reshape((1, 3, 60, 60)) lx = xmin + int(align_fc3[15][0] * width) ly = ymin + int(align_fc3[15][1] * height) lx2 = xmin + int(align_fc3[17][0] * width)+15 ly2 = (ymin + int(align_fc3[17][1] * height)) + (lx2-lx-15) frame = cv2.rectangle(frame, (lx,ly), (lx2,ly2), (255, 255, 255), 2) left_eye = frame[ly:ly2, lx:lx2] left_midpoint = (int((lx+lx2) / 2), int((ly+ly2) / 2)) in_frame_left_eye = cv2.resize(left_eye, (60, 60)) in_frame_left_eye = in_frame_left_eye.transpose((2, 0, 1)) in_frame_left_eye = in_frame_left_eye.reshape((1, 3, 60, 60)) infer_network_gaze.exec_net_g(cur_request_id, in_frame_left_eye, in_frame_right_eye, head_pose_angles) infer_network_gaze.wait(cur_request_id) gaze_vector = infer_network_gaze.get_output(0, 'gaze_vector') #length = sqrt(gaze_vector[0][0]^2 + gaze_vector[0][1]^2 + gaze_vector[0][2]^2) norm = np.linalg.norm(gaze_vector) gaze_vector = gaze_vector / norm arrow_length = int(0.4 * xmax-xmin) gaze_arrow_left = (int(arrow_length * - gaze_vector[0][0] + left_midpoint[0]), int(arrow_length * gaze_vector[0][1] + left_midpoint[1])) gaze_arrow_right = (int(arrow_length * - gaze_vector[0][0] + right_midpoint[0]), int(arrow_length * gaze_vector[0][1] + right_midpoint[1])) frame = cv2.arrowedLine(frame, left_midpoint, gaze_arrow_left, (0, 0, 0), 2) frame = cv2.arrowedLine(frame, right_midpoint, gaze_arrow_right, (0, 0, 0), 2) #x_angle = float(180.0 / np.pi * (np.pi/2 + np.arctan2(gaze_vector[0][2], gaze_vector[0][0]))) #y_angle = float(180.0 / np.pi * (np.pi/2 - np.arccos(gaze_vector[0][1] / norm))) current_position = autopy.mouse.location() x_new = (int(1 * gaze_vector[0][0] + current_position[0]), int(1 * - gaze_vector[0][1] + current_position[1])) autopy.mouse.move(x_new[0], x_new[1]) #if left_midpoint> 380: #pyautogui.moveTo(pyautogui.position()[0]-10,0,duration=1) #else: #pyautogui.moveTo(pyautogui.position()[0]-10,0,duration=1) """ # Top if x_angle > 0 and y_angle > 0: time.sleep(1) current_x = current_x current_y = current_y+5 pyautogui.moveTo(current_x,current_y,duration=1) # Right if x_angle < 0 and y_angle > 0: time.sleep(1) current_x = current_x+5 current_y = current_y pyautogui.moveTo(current_x,current_y,duration=1) # Left if x_angle > 10 and y_angle < 0: time.sleep(1) current_x = current_x - 5 current_y = current_y pyautogui.moveTo(current_x,current_y,duration=1) # Bottom if x_angle < 10 and y_angle < -10: time.sleep(1) current_x = current_x current_y = current_y-5 pyautogui.moveTo(current_x,current_y,duration=1) #pyautogui.drag(100,0, duration=2) #pyautogui.position() #pyautogui.moveTo(100,100,duration=2) """ # Top - 3,12 0, 15 # left - 35,-10 35, 10 # right - -30,23 -30, # bottom - 2,-32 cv2.imshow("Shopper Gaze Monitor", frame) if key_pressed == 27: print("Attempting to stop background threads") KEEP_RUNNING = False break if is_async_mode: # Swap infer request IDs cur_request_id, next_request_id = next_request_id, cur_request_id infer_network.clean() infer_network_pose.clean() cap.release() cv2.destroyAllWindows()