Ejemplo n.º 1
0
def main():
    global DELAY
    global KEEP_RUNNING
    global TARGET_DEVICE
    global RUN
    global is_async_mode
    mode = 'std'
    input_stream = 1
    cap = cv2.VideoCapture(input_stream)
    if not cap.isOpened():
        print("Cap wasn't open")
        return -1
    if input_stream:
        cap.open(input_stream)
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)
    cur_request_id = 0
    next_request_id = 1
    infer_network = Network()
    infer_network_pose = Network()
    infer_network_gaze = Network()
    plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model(
        './face_detection/face_detection.xml',
        TARGET_DEVICE,
        1,
        1,
        2,
    )
    n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model(
        './pose/pose.xml', TARGET_DEVICE, 1, 3, 2, None, plugin)[1]
    infer_network_gaze.load_model('./gaze_estimation/gaze.xml', TARGET_DEVICE,
                                  3, 1, 2, None, plugin)
    predictor = dlib.shape_predictor(
        r"./landmarks/shape_predictor_68_face_landmarks.dat")

    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")
    while cap.isOpened:
        ret, frame = cap.read()
        if not ret:
            KEEP_RUNNING = False
            break
        if frame is None:
            KEEP_RUNNING = False
            log.error("ERROR! blank FRAME grabbed")
            break
        key = cv2.waitKey(1) & 0xFF
        initial_wh = [cap.get(3), cap.get(4)]
        in_frame_fd = cv2.resize(frame, (w_fd, h_fd))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd))

        key_pressed = cv2.waitKey(int(DELAY))
        if key == ord('n'):
            mode = 'nar'
        if key == ord('p'):
            mode = 'std'
        if is_async_mode:
            infer_network.exec_net(next_request_id, in_frame_fd)
        else:
            infer_network.exec_net(cur_request_id, in_frame_fd)
        if infer_network.wait(cur_request_id) == 0:
            res = infer_network.get_output(cur_request_id)
            faces = face_detection(res, initial_wh)
            if len(faces) == 1:
                for res_hp in faces:
                    xmin, ymin, xmax, ymax = res_hp
                    head_pose = frame[ymin:ymax, xmin:xmax]
                    in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp))
                    in_frame_hp = in_frame_hp.transpose((2, 0, 1))
                    in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp))

                    infer_network_pose.exec_net(cur_request_id, in_frame_hp)
                    infer_network_pose.wait(cur_request_id)

                    angle_y_fc = infer_network_pose.get_output(
                        0, "angle_y_fc")[0]
                    angle_p_fc = infer_network_pose.get_output(
                        0, "angle_p_fc")[0]
                    angle_r_fc = infer_network_pose.get_output(
                        0, "angle_r_fc")[0]
                    head_pose_angles = np.array(
                        [angle_y_fc, angle_p_fc, angle_r_fc], dtype='float32')
                    head_pose_angles = head_pose_angles.transpose()

                    frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    selection = (xmin, ymin, xmax, ymax)
                    face = dlib.rectangle(*selection)
                    landmarks = predictor(frame2, face)

                    def midpoint(p1, p2):
                        return int((p1.x + p2.x) / 2), int((p1.y + p2.y) / 2)

                    try:
                        right_bottom_point = (landmarks.part(36).x,
                                              landmarks.part(36).y)
                        right_top_point = (landmarks.part(39).x,
                                           landmarks.part(39).y)

                        rcenter_top = midpoint(landmarks.part(37),
                                               landmarks.part(38))
                        rcenter_bottom = midpoint(landmarks.part(41),
                                                  landmarks.part(40))

                        h_length = hypot(
                            (right_bottom_point[0] - right_bottom_point[0]),
                            (right_top_point[1] - right_bottom_point[1]))
                        v_length = hypot((rcenter_top[0] - rcenter_bottom[0]),
                                         (rcenter_top[1] - rcenter_bottom[1]))

                        left_sq = (landmarks.part(36).x - 22,
                                   landmarks.part(37).y - 13)
                        right_sq = (landmarks.part(39).x + 11,
                                    landmarks.part(40).y + 20)
                        right_eye = frame[left_sq[1]:right_sq[1],
                                          left_sq[0]:right_sq[0]]
                        right_midpoint = (
                            int((right_bottom_point[0] + right_top_point[0]) /
                                2),
                            int((right_bottom_point[1] + right_top_point[1]) /
                                2))
                        in_frame_right_eye = cv2.resize(right_eye, (60, 60))
                        in_frame_right_eye = in_frame_right_eye.transpose(
                            (2, 0, 1))
                        in_frame_right_eye = in_frame_right_eye.reshape(
                            (1, 3, 60, 60))

                        left_bottom_point = (landmarks.part(42).x,
                                             landmarks.part(45).y)
                        left_top_point = (landmarks.part(45).x,
                                          landmarks.part(45).y)

                        lcenter_top = midpoint(landmarks.part(43),
                                               landmarks.part(44))
                        lcenter_bottom = midpoint(landmarks.part(47),
                                                  landmarks.part(46))

                        hor_length = hypot(
                            (left_bottom_point[0] - left_top_point[0]),
                            (left_bottom_point[1] - left_top_point[1]))
                        ver_length = hypot(
                            (lcenter_top[0] - lcenter_bottom[0]),
                            (lcenter_top[1] - lcenter_bottom[1]))
                        left1_sq = (landmarks.part(42).x - 11,
                                    landmarks.part(43).y - 13)
                        right1_sq = (landmarks.part(45).x + 22,
                                     landmarks.part(46).y + 20)
                        left_eye = frame[left1_sq[1]:right1_sq[1],
                                         left1_sq[0]:right1_sq[0]]
                        left_midpoint = (
                            int((left_bottom_point[0] + left_top_point[0]) /
                                2),
                            int((left_bottom_point[1] + left_top_point[1]) /
                                2))
                        in_frame_left_eye = cv2.resize(left_eye, (60, 60))
                        in_frame_left_eye = in_frame_left_eye.transpose(
                            (2, 0, 1))
                        in_frame_left_eye = in_frame_left_eye.reshape(
                            (1, 3, 60, 60))

                        infer_network_gaze.exec_net_g(cur_request_id,
                                                      in_frame_left_eye,
                                                      in_frame_right_eye,
                                                      head_pose_angles)
                        infer_network_gaze.wait(cur_request_id)

                        gaze_vector = infer_network_gaze.get_output(
                            0, 'gaze_vector')
                        norm = np.linalg.norm(gaze_vector)
                        gaze_vector = gaze_vector / norm
                        frame = draw(mode, frame, left1_sq, right1_sq,
                                     left_bottom_point, left_top_point,
                                     landmarks, lcenter_top, lcenter_bottom,
                                     right_bottom_point, left_sq, right_sq,
                                     right_top_point, rcenter_top,
                                     rcenter_bottom, xmax, xmin, gaze_vector,
                                     left_midpoint, right_midpoint)
                    except:
                        continue
                    if key == ord('r'):
                        RUN = True
                    if key == ord('s'):
                        RUN = False
                    if RUN:
                        if (hor_length / ver_length >
                                4) and not (h_length / v_length > 4):
                            pyautogui.click(button='left')
                        if (h_length / v_length >
                                4) and not (hor_length / ver_length > 4):
                            pyautogui.click(button='right')
                        current_position = autopy.mouse.location()
                        x_new = (int(25 * -gaze_vector[0][0] +
                                     current_position[0]),
                                 int(25 * -gaze_vector[0][1] +
                                     current_position[1]))
                        try:
                            autopy.mouse.move(x_new[0], x_new[1])
                        except ValueError:
                            continue
        cv2.imshow('Navigation', frame)
        if key_pressed == 27:
            print("Attempting to stop background threads")
            KEEP_RUNNING = False
            break
        if is_async_mode:
            cur_request_id, next_request_id = next_request_id, cur_request_id
        if key == ord("q"):
            break

    infer_network.clean()
    infer_network_pose.clean()
    cap.release()
    cv2.destroyAllWindows()
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global INFO
    global DELAY
    global CLIENT
    global KEEP_RUNNING
    global POSE_CHECKED
    global TARGET_DEVICE
    global is_async_mode
    
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO, stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()
    check_args()

    input_stream = 0
    cap = cv2.VideoCapture(input_stream)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    if input_stream:
        cap.open(input_stream)
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)

    # Init inference request IDs
    cur_request_id = 0
    next_request_id = 1

    # Initialise the class
    infer_network = Network()
    infer_network_pose = Network()
    infer_network_landmarks = Network()
    infer_network_gaze = Network()
    # Load the network to IE plugin to get shape of input layer
    plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model('./face_detection/face_detection.xml', TARGET_DEVICE, 1, 1, 2,
                                                                args.cpu_extension)
    n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model('./pose/pose.xml',
                                                           TARGET_DEVICE, 1,
                                                           3, 2,
                                                           args.cpu_extension, plugin)[1]
    
    n_fe, c_fe, h_fe, w_fe = infer_network_landmarks.load_model('./landmarks/landmarks.xml',
                                                           TARGET_DEVICE, 1,
                                                           1, 2,
                                                           args.cpu_extension, plugin)[1]
    infer_network_gaze.load_model('./gaze_estimation/gaze.xml',
                                                           TARGET_DEVICE, 3,
                                                           1, 2,
                                                           args.cpu_extension, plugin)
    
    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")
    ret, frame = cap.read()
    current_x = 951
    current_y = 455
    while ret:

        ret, frame = cap.read()
        if not ret:
            KEEP_RUNNING = False
            break

        if frame is None:
            KEEP_RUNNING = False
            log.error("ERROR! blank FRAME grabbed")
            break

        initial_wh = [cap.get(3), cap.get(4)]
        in_frame_fd = cv2.resize(frame, (w_fd, h_fd))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd))

        key_pressed = cv2.waitKey(int(DELAY))

        if is_async_mode:
            # Async enabled and only one video capture
            infer_network.exec_net(next_request_id, in_frame_fd)
        else:
            # Async disabled
            infer_network.exec_net(cur_request_id, in_frame_fd)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            # Results of the output layer of the network
            res = infer_network.get_output(cur_request_id)
            # Parse face detection output
            faces = face_detection(res, args, initial_wh)
            if len(faces) == 1:
                # Look for poses
                for res_hp in faces:
                    xmin, ymin, xmax, ymax = res_hp
                    head_pose = frame[ymin:ymax, xmin:xmax]
                    in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp))
                    in_frame_hp = in_frame_hp.transpose((2, 0, 1))
                    in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp))
                    
                    infer_network_pose.exec_net(cur_request_id, in_frame_hp)
                    infer_network_pose.wait(cur_request_id)
                    
                    # Parse outputs
                    angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc")[0]
                    angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc")[0]
                    angle_r_fc = infer_network_pose.get_output(0, "angle_r_fc")[0]
                    head_pose_angles = np.array([angle_y_fc , angle_p_fc, angle_r_fc], dtype = 'float32')
                    head_pose_angles = head_pose_angles.transpose()
                    
                    in_frame_eye = cv2.resize(head_pose, (w_fe, h_fe))
                    in_frame_eye = in_frame_eye.transpose((2, 0, 1))
                    in_frame_eye = in_frame_eye.reshape((n_fe, c_fe, h_fe, w_fe))
                    
                    infer_network_landmarks.exec_net(cur_request_id, in_frame_eye)
                    infer_network_landmarks.wait(cur_request_id)
                    
                    align_fc3 = infer_network_landmarks.get_output(0, "align_fc3")
                    align_fc3 = [align_fc3[0][n:n+2] for n in range(0, len(align_fc3[0]), 2)]
                    width = head_pose.shape[1]
                    height = head_pose.shape[0]
                    rx = xmin + int(align_fc3[12][0] * width)-15
                    ry = ymin + int(align_fc3[12][1] * height)
                    rx2 = xmin + int(align_fc3[14][0] * width)
                    ry2 = (ymin + int(align_fc3[12][1] * height)) + (rx2-rx-15)
                    frame = cv2.rectangle(frame, (rx,ry), (rx2,ry2), (255, 255, 255), 2)
                    right_eye = frame[ry:ry2, rx:rx2]
                    right_midpoint = (int((rx+rx2) / 2), int((ry+ry2) / 2))
                    in_frame_right_eye = cv2.resize(right_eye, (60, 60))
                    in_frame_right_eye = in_frame_right_eye.transpose((2, 0, 1))
                    in_frame_right_eye = in_frame_right_eye.reshape((1, 3, 60, 60))

                    lx = xmin + int(align_fc3[15][0] * width)
                    ly = ymin + int(align_fc3[15][1] * height)
                    lx2 = xmin + int(align_fc3[17][0] * width)+15
                    ly2 = (ymin + int(align_fc3[17][1] * height)) + (lx2-lx-15)
                    frame = cv2.rectangle(frame, (lx,ly), (lx2,ly2), (255, 255, 255), 2)
                    left_eye = frame[ly:ly2, lx:lx2]
                    left_midpoint = (int((lx+lx2) / 2), int((ly+ly2) / 2))
                    in_frame_left_eye = cv2.resize(left_eye, (60, 60))
                    in_frame_left_eye = in_frame_left_eye.transpose((2, 0, 1))
                    in_frame_left_eye = in_frame_left_eye.reshape((1, 3, 60, 60))
                    
                    infer_network_gaze.exec_net_g(cur_request_id, in_frame_left_eye, in_frame_right_eye, head_pose_angles)
                    infer_network_gaze.wait(cur_request_id)
                    
                    gaze_vector = infer_network_gaze.get_output(0, 'gaze_vector')
                    #length = sqrt(gaze_vector[0][0]^2 + gaze_vector[0][1]^2 + gaze_vector[0][2]^2)
                    norm = np.linalg.norm(gaze_vector)
                    gaze_vector = gaze_vector / norm
                    
                    arrow_length = int(0.4 * xmax-xmin)
                    gaze_arrow_left = (int(arrow_length * - gaze_vector[0][0] + left_midpoint[0]), int(arrow_length * gaze_vector[0][1] + left_midpoint[1]))
                    gaze_arrow_right = (int(arrow_length * -  gaze_vector[0][0] + right_midpoint[0]), int(arrow_length * gaze_vector[0][1] + right_midpoint[1]))
                    frame = cv2.arrowedLine(frame, left_midpoint, gaze_arrow_left, (0, 0, 0), 2)
                    frame = cv2.arrowedLine(frame, right_midpoint, gaze_arrow_right, (0, 0, 0), 2)
                    
                    #x_angle = float(180.0 / np.pi * (np.pi/2 + np.arctan2(gaze_vector[0][2], gaze_vector[0][0])))
                    #y_angle = float(180.0 / np.pi * (np.pi/2 - np.arccos(gaze_vector[0][1] / norm)))
                    
                    current_position = autopy.mouse.location()
                    x_new = (int(1 * gaze_vector[0][0] + current_position[0]), int(1 * - gaze_vector[0][1] + current_position[1]))
                    autopy.mouse.move(x_new[0], x_new[1])
                    #if left_midpoint> 380:
                        #pyautogui.moveTo(pyautogui.position()[0]-10,0,duration=1)
                    #else:
                        #pyautogui.moveTo(pyautogui.position()[0]-10,0,duration=1)
                        
                    """
                    # Top
                    if x_angle > 0 and y_angle > 0:
                        time.sleep(1)
                        current_x = current_x
                        current_y = current_y+5
                        pyautogui.moveTo(current_x,current_y,duration=1)
                    # Right
                    if x_angle < 0 and y_angle > 0: 
                        time.sleep(1)
                        current_x = current_x+5
                        current_y = current_y
                        pyautogui.moveTo(current_x,current_y,duration=1)
                    # Left
                    if x_angle > 10 and y_angle < 0:
                        time.sleep(1)
                        current_x = current_x - 5
                        current_y = current_y
                        pyautogui.moveTo(current_x,current_y,duration=1)
                    # Bottom
                    if x_angle < 10 and y_angle < -10:
                        time.sleep(1)
                        current_x = current_x
                        current_y = current_y-5
                        pyautogui.moveTo(current_x,current_y,duration=1)   
                    #pyautogui.drag(100,0, duration=2)
                    #pyautogui.position()
                    #pyautogui.moveTo(100,100,duration=2)
                    """
                    
                    # Top - 3,12                   0, 15
                    # left - 35,-10                35, 10
                    # right - -30,23               -30, 
                    # bottom - 2,-32
        cv2.imshow("Shopper Gaze Monitor", frame)

        if key_pressed == 27:
            print("Attempting to stop background threads")
            KEEP_RUNNING = False
            break

        if is_async_mode:
            # Swap infer request IDs
            cur_request_id, next_request_id = next_request_id, cur_request_id 

    infer_network.clean()
    infer_network_pose.clean()
    cap.release()
    cv2.destroyAllWindows()