Example #1
0
    def get_all_poses(self, image, mark_detector=None, face_detector=None, draw=False, draw_marks=False):
        """
        Get all the poses in an image
        """
        CNN_INPUT_SIZE = 128

        print("Initializing detectors")
        # Introduce mark_detector to detect landmarks.
        if mark_detector is None:
            from .mark_detector import MarkDetector
            mark_detector = MarkDetector()
        if face_detector is None:
            from .mark_detector import FaceDetector
            face_detector = FaceDetector()

        confidences, faceboxes = face_detector.get_faceboxes(
            image, threshold=0.2)

        poses = []
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = image[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            if draw_marks:
                mark_detector.draw_marks(image, marks, color=(0, 0, 255))

            detected_face = image[facebox[1]: facebox[3],
                                  facebox[0]: facebox[2]]  # crop detected face
            detected_face = cv2.cvtColor(
                detected_face, cv2.COLOR_BGR2GRAY)  # transform to gray scale
            detected_face = cv2.resize(
                detected_face, (48, 48))  # resize to 48x48

            # Try pose estimation with 68 points.
            pose = self.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)

            poses.append((pose[0].copy(), pose[1].copy()))

            if draw:
                self.draw_annotation_box(
                    image, pose[0], pose[1], color=(255, 128, 128))

        return poses
Example #2
0
def get_face(img_queue, box_queue):
    '''
        Get face from image queue. this function is used for multiprocessing.
        introduce mark_detector to detect landmarks
    '''
    mark_detector = MarkDetector()
    while True:
        image = img_queue.get()
        boxes = mark_detector.extract_cnn_facebox(image)
        box_queue.put(boxes)  # the boxes of faces.
Example #3
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break


        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv.flip(frame, 2)

       # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            mark_detector.draw_box(frame, [facebox], box_color=(255, 125, 0))

        # Show preview.
        cv.imshow("Preview", frame)
        if cv.waitKey(10) == 27:  # press ESC key
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Example #4
0
    def __init__(self):
        print "OpenCV version: {}".format(cv2.__version__)
        self.CNN_INPUT_SIZE = 128

        self.mark_detector = MarkDetector()

        height = 480
        width = 640
        self.pose_estimator = PoseEstimator(img_size=(height, width))

        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(8)
        ]

        self.img_queue = Queue()
        self.box_queue = Queue()
        self.box_process = Process(target=self.get_face,
                                   args=(
                                       self.mark_detector,
                                       self.img_queue,
                                       self.box_queue,
                                   ))

        self.process_ctrl_flag = rospy.get_param("process_ctrl", True)
        self.process_ctrl_subscriber = rospy.Subscriber(
            "/head_pose_estimator/process_ctrl", Bool,
            self.process_ctrl_callback)

        self.sub_img_name = rospy.get_param("sub_img_name",
                                            "/usb_cam/image_raw")
        self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image,
                                               self.img_callback)

        self.show_result_img_flag = rospy.get_param("show_result_img", True)
        self.show_axis_flag = rospy.get_param("show_axis", True)
        self.show_annotation_box_flag = rospy.get_param(
            "show_annotation_box", True)
        self.pub_result_img_flag = rospy.get_param("pub_result_img", True)

        self.img_publisher = rospy.Publisher(
            "/head_pose_estimator/result_image", Image, queue_size=10)

        self.result_publisher = rospy.Publisher(
            "/head_pose_estimator/head_pose", HeadPose, queue_size=10)

        self.box_process.start()
Example #5
0
def gen3(camera):
    """Video streaming generator function."""
    mark_detector = None
    flag1 = True
    if mark_detector == None:
        mark_detector = MarkDetector()

        # Introduce pose estimator to solve pose. Get one frame to setup the
        # estimator according to the image size.
        # height, width = sample_frame.shape[:2]
        # pose_estimator = PoseEstimator(img_size=(height, width))

        # Introduce scalar stabilizers for pose.
        pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(6)
        ]

    while True:

        frame, flag1 = camera.get_frame3(mark_detector, pose_stabilizers,
                                         flag1)
        # print (frame)
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
Example #6
0
def init_model(transform):
    global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()

    # img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    return None, None
Example #7
0
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
Example #8
0
	def __init__(self):
		# Load the parameters
		self.conf = config()
		# initialize dlib's face detector (HOG-based) and then create the
		# facial landmark predictor
		print("[INFO] loading facial landmark predictor...")
		self.detector = dlib.get_frontal_face_detector()
		self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path)
		
		# grab the indexes of the facial landmarks for the left and
		# right eye, respectively
		(self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
		(self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
		
		# initialize the video stream and sleep for a bit, allowing the
		# camera sensor to warm up
		self.cap = cv2.VideoCapture(0)
		if self.conf.vedio_path == 0:
			self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
		_, sample_frame = self.cap.read()
		
		# Introduce mark_detector to detect landmarks.
		self.mark_detector = MarkDetector()
		
		# Setup process and queues for multiprocessing.
		self.img_queue = Queue()
		self.box_queue = Queue()
		self.img_queue.put(sample_frame)
		self.box_process = Process(target=get_face, args=(
			self.mark_detector, self.img_queue, self.box_queue,))
		self.box_process.start()
		
		# Introduce pose estimator to solve pose. Get one frame to setup the
		# estimator according to the image size.
		self.height, self.width = sample_frame.shape[:2]
		self.pose_estimator = PoseEstimator(img_size=(self.height, self.width))
		
		# Introduce scalar stabilizers for pose.
		self.pose_stabilizers = [Stabilizer(
			state_num=2,
			measure_num=1,
			cov_process=0.1,
			cov_measure=0.1) for _ in range(6)]
		
		self.tm = cv2.TickMeter()
		# Gaze tracking
		self.gaze = GazeTracking()
Example #9
0
def init():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    return cap, video_src, img_queue, box_queue, tm, mark_detector, pose_estimator
def webcam_main():
    print("Camera sensor warming up...")
    vs = cv2.VideoCapture(0)
    time.sleep(2.0)

    mark_detector = MarkDetector()
    
    # loop over the frames from the video stream
    while True:
        _, frame = vs.read()
        start = cv2.getTickCount()

        frame = imutils.resize(frame, width=750, height=750)
        frame = cv2.flip(frame, 1)
        faceboxes = mark_detector.extract_cnn_facebox(frame)

        if faceboxes is not None:
            for facebox in faceboxes:
                # Detect landmarks from image of 64X64 with grayscale.
                face_img = frame[facebox[1]: facebox[3],
                                    facebox[0]: facebox[2]]
                # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2)
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
                face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1)

                land_start_time = time.time()
                marks = mark_detector.detect_marks_keras(face_img0)
                # marks *= 255
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]
                # Draw Predicted Landmarks
                mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2)

        fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency()
        cv2.putText(frame, '%.1ffps'%(1/fps_time) , (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0))
        # show the frame
        cv2.imshow("Frame", frame)
        # writer.write(frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

    # do a bit of cleanup
    cv2.destroyAllWindows()
    vs.stop()
def headPoseEstimation():
    print("HEAD POSE ESTIMATION...")
    """MAIN"""
    # Video source from webcam or video file.
    #video_src = 0
    #video_src = 'EWSN.avi'
    #cam = cv2.VideoCapture(video_src)
    #_, sample_frame = cam.read()

    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    print("1")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    print("2")
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    print("3")
    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    # start = time.time()
    # previousTime1 = start
    # previousTime2 = start
    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break
        #print("4")
        #print(frame.shape)
        #print("5")
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        #if video_src == 0:
        #    print("6")
        #    frame = cv2.flip(frame, 2)
        #    cv2.imwrite("Preview.png", frame)
        #    print("7")

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)
        #print("8")
        # Get face from box queue.
        facebox = box_queue.get()
        #print(type(facebox)
        #print(facebox
        #print("9")
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            print(face_img.shape)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # # Get average position of nose
            noseMarksTemp = []
            noseMarksTemp.append(marks[30][0])
            noseMarksTemp.append(marks[30][1])
            noseMarks[0] = noseMarksTemp

            for i in range(9, 0, -1):
                noseMarks[i] = noseMarks[i - 1]

            # Get the direction of head movement
            headPoseDirection = calculateDirection(noseMarks)
            #if headPoseDirection != 'still':
            directionArray.append(headPoseDirection)
            #print(directionArray
            #print(len(directionArray)
            print("To capture a movement press 'a' and perform a movement.")
            #currentTime1 = time.time()
            if cv2.waitKey(5) == ord('a') and not classifyMoves:
                classifyMoves = 1
                print("Start classifying movement...")
                timer = time.time()
                currentTime = timer
                previousTime1 = timer
                previousTime2 = timer

            if cv2.waitKey(5) == ord('b') and classifyMoves:
                classifyMoves = 0
                print("Stopped classifying movement...")

            currentTime = time.time()
            if currentTime - previousTime1 > 2 and classifyMoves:
                print("------------------------------------------------")
                print("Elapsed timer 1: " + str(currentTime - previousTime1))
                #print(len(directionArray)

                # Get most common direction
                moveClass = mostCommon(directionArray)
                #moveSequence.append(moveClass)
                print(moveClass)

                # Get a sequence of head movements
                # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0:
                #     print("Elapsed timer 2: " + str(currentTime - previousTime2)
                #     numMoves = len(moveSequence)
                #     moves = moveSequence[(numMoves-5):(numMoves-1)]
                #     print(moves
                #     moveSequence = []
                #     previousTime2 = currentTime
                #     classifyMoves = 0
                classifyMoves = 0
                directionArray = []
                previousTime1 = currentTime

                print(
                    "To continue type 'c' or to recapture a movement type 'a'."
                )

            if cv2.waitKey(5) == ord('c'):
                break

            #print(previousTime
            # Uncomment following line to show raw marks.
            #mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # if len(moves) > 1:
        #     cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4)

        cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0),
                    4)

        # Show preview.
        #cv2.namedWindow("", cv2.WINDOW_NORMAL)
        cv2.imshow("Preview", frame)
        #cv2.resizeWindow("preview", 5000,5000)
        if cv2.waitKey(5) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    return moveClass
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        pre_yaw = self.fc_yaw(x)
        pre_pitch = self.fc_pitch(x)
        pre_roll = self.fc_roll(x)

        return pre_yaw, pre_pitch, pre_roll


gpu = 0

if __name__ == '__main__':
    args = parse_args()
    cuda0 = torch.device('cuda:0')
    cudnn.enabled = True
    mark_detector = MarkDetector()
    batch_size = 1
    gpu = args.gpu_id
    snapshot_path = args.snapshot
    out_dir = 'output/video'
    video_path = args.video_path
    # initialize the NN
    model = ConvAutoencoder(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3])

    estimation_model = pose_estimation(torchvision.models.resnet.Bottleneck,
                                       [3, 4, 6, 3], 67)
    estimation_model.cuda(gpu)
    hopenet = Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3],
                      67).cuda()
    hopenet.load_state_dict(
        torch.load(
def Camera_Capture():
    mark_detector = MarkDetector()
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)
    
    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok = 'True')
    
    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok = 'True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    poses=['frontal','right','left','up','down']
    file=0
    cap = cv2.VideoCapture(file)
    
    ret, sample_frame = cap.read()
    i = 0
    count = 0
    if ret==False:
        return    
        
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    
    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]
    images_saved_per_pose=0
    number_of_images = 0
    
    shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH)
    while i<5:
        saveit = False
        # Read frame, crop it, flip it, suits your needs.
        ret, frame = cap.read()
        if ret is False:
            break
        if count % 5 !=0: # skip 5 frames
            count+=1
            continue
        if images_saved_per_pose==IMAGE_PER_POSE:
            i+=1
            images_saved_per_pose=0

        # If frame comes from webcam, flip it so it looks like a mirror.
        if file == 0:
            frame = cv2.flip(frame, 2)
        original_frame=frame.copy()
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        facebox = mark_detector.extract_cnn_facebox(frame)
    
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1=max(facebox[0]-0,0)
            x2=min(facebox[2]+0,width)
            y1=max(facebox[1]-0,0)
            y2=min(facebox[3]+0,height)
            
            face = frame[y1: y2,x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    
            marks = mark_detector.detect_marks([face_img])
    
            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]
        
            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
    
            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))
    
            # print(steady_pose[0][0])
            # if steady_pose[0][0]>0.1:
            #     print('right')
            # else: 
            #     if steady_pose[0][0]<-0.1:
            #         print('left')
            # if steady_pose[0][1]>0.1:
            #     print('down')
            # else: 
            #     if steady_pose[0][1]<-0.1:
            #         print('up')
            # print(steady_pose[0])
            if i==0:
                if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True                    
            if i==1:
                if steady_pose[0][0]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==2:
                if steady_pose[0][0]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==3:
                if steady_pose[0][1]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==4:
                if steady_pose[0][1]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            # Show preview.
            if i>=5:
                print ('Thank you')
                break

            frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA)
            frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2)
            cv2.imshow("Preview", frame)

            if saveit:                        
                face = dlib.rectangle(x1, y1, x2, y2)
                face_aligned = face_aligner.align(original_frame, frame_gray, face)
                cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned)
                print(images_saved_per_pose)
                number_of_images+=1

        if cv2.waitKey(10) == 27:
            break    
    cap.release()
    cv2.destroyAllWindows()
def main():
    instructions = 0
    print "--- Intention classification for communication between a human and a robot ---"
    if instructions == 1:
        print "First you will be required to present a facial expression before you will do a head movement."
        print "If done correctly these gestures will be detected by the robot and it will perform the desired task."
        raw_input("Press Enter to continue...")

    if instructions == 1:
        print "This is the module for facial expression recognition."
        print "This program can detect the emotions: Happy and Angry."
        print "The program will look for the expression for 3 seconds."
    raw_input("To proceed to Facial Expression Recognition press Enter...")
    predictExp = 0

    # Load Facial Expression Recognition trained model
    print "- Loading FER model... -"
    #faceExpModel = tf.keras.models.load_model("/home/bjornar/ML_models/FER/Good models(80+)/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5")
    faceExpModel = tf.keras.models.load_model(
        "/home/project/Bjorn/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5"
    )

    # Load Face Cascade for Face Detection
    print "- Loading Face Cascade for Face Detection... -"
    #cascPath = "/home/bjornar/MScDissertation/TrainingData/FaceDetection/haarcascade_frontalface_default.xml"
    cascPath = "/home/project/Bjorn/IntentionClassification-Repository/haarcascade_frontalface_default.xml"
    faceCascade = cv2.CascadeClassifier(cascPath)

    ## Initializing Head Movement variables
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    sample_frame = cv2.imread("sample_frame.png")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]

    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    if camera == 'kinect':
        ## Initialize Kinect camera
        print "Initializing camera..."
        try:
            from pylibfreenect2 import OpenGLPacketPipeline
            pipeline = OpenGLPacketPipeline()
        except:
            try:
                from pylibfreenect2 import OpenCLPacketPipeline
                pipeline = OpenCLPacketPipeline()
            except:
                from pylibfreenect2 import CpuPacketPipeline
                pipeline = CpuPacketPipeline()
        #print("Packet pipeline:", type(pipeline).__name__)

        # Create and set logger
        #logger = createConsoleLogger(LoggerLevel.Debug)
        setGlobalLogger()

        fn = Freenect2()
        num_devices = fn.enumerateDevices()
        if num_devices == 0:
            print("- No device connected! -")
            sys.exit(1)

        serial = fn.getDeviceSerialNumber(0)
        device = fn.openDevice(serial, pipeline=pipeline)

        listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir
                                          | FrameType.Depth)

        # Register listeners
        device.setColorFrameListener(listener)
        device.setIrAndDepthFrameListener(listener)

        device.start()

        # NOTE: must be called after device.start()
        registration = Registration(device.getIrCameraParams(),
                                    device.getColorCameraParams())

    elif camera == 'webcam':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(-1)

    elif camera == 'video':
        #video_capture = cv2.VideoCapture(0)
        video_capture = cv2.VideoCapture(
            "/home/project/Bjorn/SonyHandycamTest.mp4")

    ## Facial Expression Recognition variables
    FER_prediction = []
    FERclass = ''
    FERstart = 0
    classifyMoves = 0

    ## Head movement variables
    predictHeadMov = 3
    HeadMov = []
    HMCclass = ''
    detectedFaces = []
    mark = []

    notDone = 0
    progressStatus = [0, 0]

    while notDone in progressStatus:  # This waits for each module to finsih
        if camera == 'kinect':
            frames = listener.waitForNewFrame()

            color = frames["color"]
            color = color.asarray()
            color = cv2.resize(color, (int(873), int(491)))
            color = color[0:480, 150:790]
            color = np.delete(color, np.s_[3::], 2)

        elif camera == 'webcam':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        elif camera == 'video':
            # Capture frame-by-frame
            ret, color = video_capture.read()

        ## Detect facial expression
        predictExpNums = [1, 2]
        if predictExp == 0:
            while predictExp not in predictExpNums:
                predictExp = int(
                    raw_input(
                        "\nPress 1 to detect Facial Expression or press 2 to do Head Movement classification."
                    ))
            if predictExp == 1:
                predictExp = 1
                print "------ Facial Expression Recognition ------"
            elif predictExp == 2:
                predictHeadMov = 0
                progressStatus[0] = 1

            FERstart = time.time()

        elif predictExp == 1:
            currentTime = time.time()
            if currentTime - FERstart < 10:
                FER_prediction.append(
                    facialExpressionRecogntion(color, faceExpModel,
                                               faceCascade))
            else:
                predictExp = 2
                FER_prediction = filter(None, FER_prediction)
                FERclass = mostCommon(FER_prediction)
                FERclass = FERclass[2:7]
                predictHeadMov = 0
                if FERclass == '':
                    print("Did not detect an expression, try again.")
                    predictExp = 0
                else:
                    progressStatus[0] = 1
                    print "Detected expression: " + str(FERclass)
                    print "Progress: FER DONE"
                # else:
                #     #cv2.imwrite("sample_frame.png", color)
                #     break

        ## Detect head movement class
        if predictHeadMov == 0:
            predictHeadMov = int(
                raw_input(
                    "\nPress 1 to do Head Movement classification or 2 to skip."
                ))
            if predictHeadMov == 1:
                predictHeadMov = 1
                print "------ Head Movement Classification ------"
                moveTime = int(
                    raw_input(
                        "How many seconds should I record your movement?"))
                #moveTime = 2
            else:
                predictHeadMov = 2
            timer = time.time()
            previousTime1 = timer
            previousTime2 = timer

        if predictHeadMov == 1:
            print color.shape
            color = color[0:480, 0:480]
            color = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY)
            cv2.imshow("", color)
            raw_input()
            print color.shape
            # Feed frame to image queue.
            img_queue.put(color)

            #pdb.set_trace()

            # Get face from box queue.
            facebox = box_queue.get()
            print color.shape

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = color[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                marks = mark_detector.detect_marks(face_img)

                #Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(color, headPoseDirection, (20, 70), font, 1,
                            (0, 255, 0), 4)

                # # Get average position of nose
                noseMarksTemp = []
                noseMarksTemp.append(marks[30][0])
                noseMarksTemp.append(marks[30][1])
                noseMarks[0] = noseMarksTemp

                for i in range(9, 0, -1):
                    noseMarks[i] = noseMarks[i - 1]

                # Get the direction of head movement
                headPoseDirection = calculateDirection(noseMarks)

                directionArray.append(headPoseDirection)

                if classifyMoves == 0:
                    classifyMoves = 1
                    timer = time.time()
                    previousTime1 = timer
                    previousTime2 = timer

                currentTime = time.time()
                if currentTime - previousTime1 > moveTime and classifyMoves == 1:
                    print "------------------------------------------------"
                    print "Elapsed timer 1: " + str(currentTime -
                                                    previousTime1)

                    # Get most common direction
                    HMCclass = mostCommon(directionArray)

                    classifyMoves = 2
                    directionArray = []
                    previousTime1 = currentTime

                    progressStatus[1] = 1
                    print "Progress: HMC DONE"
            else:
                print "Did not detect a face"
        elif predictHeadMov == 2:
            progressStatus[1] = 1
            print "Skipped Head Movement Estimation."
            break

        # if notDone in progressStatus and predictHeadMov == 2 and predictExp == 2:
        #     print "You seem to have skipped one or more tasks."
        #     inpt = ''
        #     while inpt == '':
        #         inpt = raw_input("To do FER press 1 and to do HMC press 2...")
        #         if input == '1':
        #             predictExp = 1
        #         elif input == '2':
        #             predictHeadMov

        cv2.imshow("", color)
        if camera == 'kinect':
            listener.release(frames)

        key = cv2.waitKey(delay=1)
        if key == ord('q'):
            break

    if camera == 'kinect':
        listener.release(frames)
        device.stop()
        device.close()
    elif camera == 'webcam' or camera == 'video':
        video_capture.release()

    cv2.destroyAllWindows()

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    print "---------------- RESULT ----------------"

    if FERclass != '':
        print "Detected facial expression: " + str(FERclass)
    else:
        print "Did not detect any expression."

    if HMCclass != '':
        print "Detected head movement: " + str(HMCclass)
    else:
        print "Did not detect a head movement."

    print "----------------------------------------"

    intentionClassification = [FERclass, HMCclass]

    return intentionClassification
Example #15
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Example #16
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))
            right_corner = tuple([int(i) for i in marks[36]])
            left_corner = tuple([int(i) for i in marks[45]])
            # print(marks[36], marks[45])
            cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2)

            pixel_distance = int(
                math.sqrt((right_corner[0] - left_corner[0])**2 +
                          (right_corner[1] - left_corner[1])**2))
            estimated_distance = (real_width * focal_length) / pixel_distance

            cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            print(steady_pose[1])
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            angles = process_eyes(frame, marks)
            if bool(angles) is True:
                # print(angles)
                angles = cvt_to_radians(angles)
                rotated_vector = rotate_vector(steady_pose[0],
                                               angles['right'][0],
                                               angles['right'][1])
                shifted_translation_vector = np.copy(steady_pose[1])
                shifted_translation_vector[0] += 50
                shifted_translation_vector[1] += 50
                pose_estimator.draw_axes(frame, rotated_vector,
                                         shifted_translation_vector)

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def Image_Database_Generator():
    mark_detector = MarkDetector()
    video_capture = cv2.VideoCapture(file)
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)

    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok='True')

    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok='True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    number_of_images = 0
    MAX_NUMBER_OF_IMAGES = 50
    count = 0
    count = 0
    while number_of_images < MAX_NUMBER_OF_IMAGES:
        ret, frame = video_capture.read()
        count += 1
        if ret == False:
            break
        if count % 5 != 0:
            continue

        frame = cv2.flip(frame, 1)
        height, width, _ = frame.shape
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        original_frame = frame.copy()

        height, width = frame.shape[:2]

        shape_predictor = dlib.shape_predictor(
            "shape_predictor_68_face_landmarks.dat")
        face_aligner = FaceAligner(shape_predictor,
                                   desiredFaceWidth=FACE_WIDTH)

        facebox = mark_detector.extract_cnn_facebox(frame)

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1 = max(facebox[0] - 0, 0)
            x2 = min(facebox[2] + 0, width)
            y1 = max(facebox[1] - 0, 0)
            y2 = min(facebox[3] + 0, height)

            face = frame[y1:y2, x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            marks = mark_detector.detect_marks([face_img])

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Show preview.
            if showit:
                frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0),
                                      2)
                cv2.imshow("Preview", frame)

            face = dlib.rectangle(x1, y1, x2, y2)
            face_aligned = face_aligner.align(original_frame, frame_gray, face)
            cv2.imwrite(
                os.path.join(directory,
                             str(name) + '_' + str(number_of_images) + '.jpg'),
                face_aligned)

            number_of_images += 1
            print(number_of_images)

        if cv2.waitKey(10) == 27:
            break
    print('Thank you')
    video_capture.release()
    cv2.destroyAllWindows()
Example #18
0
def main():

    ''' main function'''

    img_dir = "img_dir/"     # image file dir

    # setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_list = []
    for filename in os.listdir(img_dir):
        frame = cv2.imread(img_dir + filename)
        img_list.append(frame)
        img_queue.put(frame)
    box_process = Process(target=get_face, args=(img_queue, box_queue))
    box_process.start()

    # introduce mark_detector to detect landmarks
    mark_detector = MarkDetector()

    while True:
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip
        # frame = cv2.flip(frame, 2)

        if len(img_list) != 0:
            frame = img_list.pop(0)
            raw_frame = frame.copy()
            # introduce pos estimator to solve pose. Get one frames to setup the
            # estimator according to the images size.
            height, width = frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))
        else:
            break

        # Pose estimation by 3 steps.
        # 1. detect faces.
        # 2. detect landmarks.
        # 3. estimate pose.

        # get face from box queue.
        faceboxes = box_queue.get()
        # print("the length of facebox is " + str(len(faceboxes)))
        for facebox in faceboxes:
            # detect landmarks from image 128 * 128
            face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]    # cut off the area of face.

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)    # BGR -> RGB
            marks = mark_detector.detect_marks(face_img)

            # covert the marks locations from local CNN to global image.
            marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle.
            marks[:, 1] *=  (facebox[3] - facebox[1])  # the length of rectangle.
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # uncomment following line to show raw marks.
            # util.draw_marks(frame, marks, color=(0, 255, 0))
            # util.draw_faceboxes(frame, facebox)

            # try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
           
            # pose[0] is rotation_vector, and pose[1] is translation_vector
            
            pose_np = np.array(pose).flatten()
            pose = np.reshape(pose_np, (-1, 3))

            angle = util.get_angle(pose[0])
            pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2])

        # show preview
        cv2.imshow("Preview", frame)
        cv2.imshow("raw_frame", raw_frame)
        if cv2.waitKey(0) == 27:
            continue

    # clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Example #19
0
                         name="logits")

# print(tf.all_variables())

sess = tf.Session()
INPUT_FILE = '/private/var/py/ReadHeart/static/datas/1532335528.0497806/1532335528.0497806.mp4'

# cam = cv2.VideoCapture(video_src)
cam = cv2.VideoCapture(INPUT_FILE)
# _, sample_frame = cam.read()
#
#
# print('sample_frame', sample_frame)
#
# # Introduce mark_detector to detect landmarks.
mark_detector = MarkDetector()
# facebox = mark_detector.extract_cnn_facebox(sample_frame)
# print('Facebox', facebox)
#
# height, width = sample_frame.shape[:2]
# pose_estimator = PoseEstimator(img_size=(height, width))

# Introduce scalar stabilizers for pose.
pose_stabilizers = [
    Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1)
    for _ in range(6)
]
# sess =
frame = None
count = 0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Example #21
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def single_main():
    """MAIN"""
    cam = cv2.VideoCapture(VIDEO_PATH)
    _, sample_frame = cam.read()

    # Load Landmark detector
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    while True:
        frame_got, frame = cam.read()
        if frame_got is False:
            break
        img_queue.put(frame)
        start = cv2.getTickCount()

        # Get face from box queue.
        faceboxes = box_queue.get()
        if faceboxes is not None:
            for facebox in faceboxes:
                # if facebox is not None:
                # Detect landmarks from image of 64x64.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2)
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
                face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE,
                                             1)
                marks = mark_detector.detect_marks_keras(
                    face_img0)  # landmark predictor
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Draw Predicted Landmarks
                mark_detector.draw_marks(frame, marks, color=(255, 255, 255))

        fps_time = (cv2.getTickCount() - start) / cv2.getTickFrequency()
        cv2.putText(frame, '%.1ffps' % (1 / fps_time),
                    (frame.shape[1] - 65, 15), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                    (127, 127, 127))
        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == ord('q'):
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    # out.release()
    cam.release()
    cv2.destroyAllWindows()
Example #23
0
def main():

    # Initiate Class for Face & Mark Detector
    face_detector = FaceDetector()
    mark_detector = MarkDetector()

    # Landmark 3D for projection and landmark 2d index of corresponding mark
    landmarks_3d = np.array(
        [
            [0.000000, 0.000000, 6.763430],  # 33 nose bottom edge
            [6.825897, 6.760612, 4.402142],  # 17 left brow left corner
            [1.330353, 7.122144, 6.903745],  # 21 left brow right corner
            [-1.330353, 7.122144, 6.903745],  # 22 right brow left corner
            [-6.825897, 6.760612, 4.402142],  # 26 right brow right corner
            [5.311432, 5.485328, 3.987654],  # 36 left eye left corner
            [1.789930, 5.393625, 4.413414],  # 39 left eye right corner
            [-1.789930, 5.393625, 4.413414],  # 42 right eye left corner
            [-5.311432, 5.485328, 3.987654],  # 45 right eye right corner
            [2.005628, 1.409845, 6.165652],  # 31 nose left corner
            [-2.005628, 1.409845, 6.165652],  # 35 nose right corner
            [2.774015, -2.080775, 5.048531],  # 48 mouth left corner
            [-2.774015, -2.080775, 5.048531],  # 54 mouth right corner
            [0.000000, -3.116408, 6.097667],  # 57 mouth central bottom corner
            [0.000000, -7.415691, 4.070434]  # 8 chin corner
        ],
        dtype=np.double)
    lm_2d_index = [33, 17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8]

    # Define color for facebox
    color = (244, 134, 66)

    # Initiate Video Streaming
    file_name = 'assets/trump.gif'
    vs = FileVideoStream(file_name).start()
    time.sleep(2.0)

    frame = vs.read()

    while frame is not None:
        frame = imutils.resize(frame, width=800, height=600)
        (H, W) = frame.shape[:2]

        faceboxes = face_detector.extract_square_facebox(frame)

        if faceboxes is not None:
            for facebox in faceboxes:
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]

                marks = mark_detector.detect_marks(face_img)
                marks *= facebox[2] - facebox[0]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                rvec, tvec, cm, dc = get_headpose(h=H,
                                                  w=W,
                                                  landmarks_2d=marks,
                                                  landmarks_3d=landmarks_3d,
                                                  lm_2d_index=lm_2d_index)

                draw_front_box(frame, color, rvec, tvec, cm, dc)

        cv2.imshow("3D Face Box", frame)
        # writer.write(frame)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
        if key == ord("w"):
            frame = vs.read()
Example #24
0
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)

    # Get the frame size. This will be used by the pose estimator.
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

    # 2. Introduce a pose estimator to solve pose.
    pose_estimator = PoseEstimator(img_size=(height, width))

    # 3. Introduce a mark detector to detect landmarks.
    mark_detector = MarkDetector()

    # 4. Measure the performance with a tick meter.
    tm = cv2.TickMeter()

    # Now, let the frames flow.
    while True:

        # Read a frame.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # If the frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
Example #26
0
def main(strargument):
    shutil.rmtree("./test")
    os.mkdir("./test")
    os.remove("result.txt")
    f = open("result.txt", "a")
    cap = cv2.VideoCapture(strargument)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap = cv2.VideoCapture("NTQ.mkv")
    #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi")
    #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg")
    # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview")
    success, frame = cap.read()
    startId = countIdFolder("./face_db/")
    # quit if unable to read the video file
    if not success:
        print('Failed to read video')
        sys.exit(1)
    #The color of the rectangle we draw around the face
    rectangleColor = (0, 165, 255)
    #variables holding the current frame number and the current faceid
    frameCounter = 0
    currentFaceID = 0
    #Variables holding the correlation trackers and the name per faceid
    conf = configparser.ConfigParser()
    conf.read("config/main.cfg")
    mtcnn_detector = load_mtcnn(conf)
    MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH")
    VERIFICATION_THRESHOLD = float(
        conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD"))
    FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH")
    BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH"))
    MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE"))
    MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL"))
    YAWL = int(conf.get("CUSTOM", "YAWL"))
    YAWR = int(conf.get("CUSTOM", "YAWR"))
    PITCHL = int(conf.get("CUSTOM", "PITCHL"))
    PITCHR = int(conf.get("CUSTOM", "PITCHR"))
    ROLLL = int(conf.get("CUSTOM", "ROLLL"))
    ROLLR = int(conf.get("CUSTOM", "ROLLR"))
    MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED"))
    IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH"))
    EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y"))
    EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X"))
    SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH"))
    MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN"))
    MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE"))
    LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME"))
    ROIXL = int(conf.get("CUSTOM", "ROIXL"))
    ROIXR = int(conf.get("CUSTOM", "ROIXR"))
    ROIYB = int(conf.get("CUSTOM", "ROIYB"))
    ROIYA = int(conf.get("CUSTOM", "ROIYA"))
    maxDisappeared = MAXDISAPPEARED  ## khong xuat hien toi da 100 frame
    faces_db = load_faces(FACE_DB_PATH, mtcnn_detector)
    # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector)
    time.sleep(10)
    for item in faces_db:
        print(item["name"])
    listTrackedFace = []
    mark_detector = MarkDetector()
    tm = cv2.TickMeter()
    _, sample_frame = cap.read()
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    with tf.Graph().as_default():
        with tf.Session() as sess:
            load_mobilefacenet(MODEL_PATH)
            inputs_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            try:
                start = time.time()
                while True:
                    start1 = time.time()
                    retval, frame = cap.read()

                    #Increase the framecounter
                    frameCounter += 1
                    if retval:
                        _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR]
                        cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB),
                                      (0, 0, 255), 2)
                        good_face_index = []
                        # faces_db = load_face_db.face_db
                        if (frameCounter % 1) == 0:
                            ### embed and compare name
                            for i, face_db in enumerate(faces_db):
                                if not os.path.isdir(
                                        "./face_db/" +
                                        face_db["name"].split("_")[0]):
                                    faces_db.pop(i)
                            faces, landmarks = mtcnn_detector.detect(_frame)
                            if faces.shape[0] is not 0:
                                input_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                save_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                (yaw, pitch, roll) = (0, 0, 0)
                                for i, face in enumerate(faces):
                                    if round(faces[i, 4], 6) > IS_FACE_THRESH:
                                        bbox = faces[i, 0:4]
                                        points = landmarks[i, :].reshape(
                                            (5, 2))
                                        nimg = face_preprocess.preprocess(
                                            _frame,
                                            bbox,
                                            points,
                                            image_size='112,112')
                                        save_images[i, :] = nimg
                                        nimg = nimg - 127.5
                                        nimg = nimg * 0.0078125
                                        input_images[i, :] = nimg
                                        (x1, y1, x2, y2) = bbox.astype("int")
                                        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2:
                                            continue
                                        temp = int((y2 - y1) / EXTEND_Y)
                                        y1 = y1 + temp
                                        y2 = y2 + temp
                                        temp = int((x2 - x1) / EXTEND_X)
                                        if x1 > temp:
                                            x1 = x1 - temp
                                        x2 = x2 + temp
                                        # cv2.imshow("mainframe",frame)
                                        # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2])
                                        face_img = cv2.resize(
                                            _frame[y1:y2, x1:x2], (128, 128))
                                        # cv2.imshow("ok",face_img)
                                        face_img = cv2.cvtColor(
                                            face_img, cv2.COLOR_BGR2RGB)
                                        tm.start()
                                        marks = mark_detector.detect_marks(
                                            [face_img])
                                        tm.stop()
                                        marks *= (x2 - x1)
                                        marks[:, 0] += x1
                                        marks[:, 1] += y1
                                        # mark_detector.draw_marks(
                                        #         frame, marks, color=(0, 255, 0))
                                        pose, (
                                            yaw, pitch, roll
                                        ) = pose_estimator.solve_pose_by_68_points(
                                            marks)
                                        # temp = frame
                                        # cv2.putText(temp,"yaw:  "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"roll:   "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.imshow("frame",temp)
                                        # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR):
                                        #     good_face_index.append(i)
                                        # cv2.waitKey(0)
                                # print(good_face_index)
                                feed_dict = {inputs_placeholder: input_images}
                                emb_arrays = sess.run(embeddings,
                                                      feed_dict=feed_dict)
                                emb_arrays = sklearn.preprocessing.normalize(
                                    emb_arrays)
                                names = []
                                sims = []
                                for i, embedding in enumerate(emb_arrays):
                                    # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False:
                                    #     names.append(listTrackedFace[i].name)
                                    #     continue
                                    embedding = embedding.flatten()
                                    temp_dict = {}
                                    for com_face in faces_db:
                                        ret, sim = feature_compare(
                                            embedding, com_face["feature"],
                                            0.65)
                                        temp_dict[com_face["name"]] = sim
                                    # print(temp_dict)
                                    dictResult = sorted(temp_dict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    # print(dictResult[:5])
                                    name = ""
                                    if len(dictResult) > 0 and dictResult[0][
                                            1] > VERIFICATION_THRESHOLD:
                                        name = dictResult[0][
                                            0]  #.split("_")[0]
                                        sim = dictResult[0][1]
                                        ## wite log
                                        t = time.time()
                                        f.write(name + "___" +
                                                str((t - start) // 60) + ":" +
                                                str(int(t - start) % 60) +
                                                "\n")
                                    else:
                                        name = "unknown"
                                        sim = 0
                                    names.append(name)
                                    sims.append(sim)

                                    # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:])
                                    # if len(dictResult)>0 :
                                    # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:])
                                    ################################ tracker
                                for i, embedding in enumerate(emb_arrays):
                                    embedding = embedding.flatten()
                                    ResultDict = {}
                                    for objectTrackFace in listTrackedFace:
                                        tempList = []
                                        (x1, y1, x2,
                                         y2) = objectTrackFace.latestBox
                                        for com_face in objectTrackFace.listEmbedding:
                                            ret, sim = feature_compare(
                                                embedding, com_face, 0.65)
                                            tempList.append(sim)
                                        tempList.sort(reverse=True)
                                        if len(tempList) > 0:
                                            if tempList[0] > 0.9 or (
                                                    similarIOU(
                                                        faces[i, :4].astype(
                                                            "int"),
                                                        objectTrackFace.
                                                        latestBox) and
                                                (frameCounter - objectTrackFace
                                                 .latestFrameCounter) < 3):
                                                ResultDict[objectTrackFace.
                                                           name] = tempList[0]
                                    dictResult = sorted(ResultDict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    if True:
                                        if len(
                                                ResultDict
                                        ) > 0 and dictResult[0][
                                                1] > SIMILAR_THRESH:  ## neu khop -- 0.5
                                            # for ik in range(len(dict)):
                                            #     if dict[ik][1]>SIMILAR_THRESH:

                                            nameTrackCurrent = dictResult[0][0]
                                            for index, tempFaceTrack in enumerate(
                                                    listTrackedFace):
                                                if tempFaceTrack.name == nameTrackCurrent:
                                                    if len(tempFaceTrack.
                                                           listImage
                                                           ) > MAX_LIST_LEN:
                                                        tempFaceTrack.listImage.pop(
                                                            0)
                                                        tempFaceTrack.listEmbedding.pop(
                                                            0)
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    else:
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    if names[i] != "unknown":
                                                        if RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            tempFaceTrack.name = names[
                                                                i]
                                                        # else: #################
                                                        #     names[i] = nameTrackCurrent
                                                    else:
                                                        if not RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            names[
                                                                i] = nameTrackCurrent
                                                    tempFaceTrack.countDisappeared = 0
                                                    tempFaceTrack.latestBox = faces[
                                                        i, 0:4].astype("int")
                                                    tempFaceTrack.latestFrameCounter = frameCounter
                                                    tempFaceTrack.liveTime = 0
                                                    tempFaceTrack.justAdded = True  ## but we still action with it
                                                    break

                                        else:  ## neu khong khop thi tao moi nhung chi them anh khi mat du tot
                                            if len(ResultDict) > 0:
                                                print(dictResult[0][1])
                                            if names[i] != "unknown":
                                                newTrackFace = trackedFace(
                                                    names[i])
                                            else:
                                                newTrackFace = trackedFace(
                                                    str(currentFaceID))
                                                currentFaceID = currentFaceID + 1
                                            if measureGoodFace(
                                                    MIN_FACE_SIZE,
                                                    MAX_BLACK_PIXEL,
                                                    save_images[i, :], yaw,
                                                    pitch, roll, BLUR_THRESH,
                                                    YAWL, YAWR, PITCHL, PITCHR,
                                                    ROLLL, ROLLR):
                                                newTrackFace.listImage.append(
                                                    save_images[i, :])
                                                newTrackFace.listEmbedding.append(
                                                    emb_arrays[i])
                                            newTrackFace.latestBox = faces[
                                                i, 0:4].astype("int")
                                            newTrackFace.latestFrameCounter = frameCounter
                                            # print(newTrackFace.latestBox)
                                            newTrackFace.justAdded = True
                                            listTrackedFace.append(
                                                newTrackFace)  ## add list
                                ### disappeared
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    if trackFace.justAdded == False:
                                        trackFace.countDisappeared = trackFace.countDisappeared + 1
                                        trackFace.liveTime = trackFace.liveTime + 1
                                    else:
                                        trackFace.justAdded = False
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                                for i, face in enumerate(faces):
                                    x1, y1, x2, y2 = faces[i][0], faces[i][
                                        1], faces[i][2], faces[i][3]
                                    x1 = max(int(x1), 0)
                                    y1 = max(int(y1), 0)
                                    x2 = min(int(x2), _frame.shape[1])
                                    y2 = min(int(y2), _frame.shape[0])
                                    cv2.rectangle(frame,
                                                  (x1 + ROIXL, y1 + ROIYA),
                                                  (x2 + ROIXL, y2 + ROIYA),
                                                  (0, 255, 0), 2)
                                    # if i in good_face_index:
                                    # if not RepresentsInt(names[i]):
                                    cv2.putText(frame, names[i].split("_")[0],
                                                (int(x1 / 2 + x2 / 2 + ROIXL),
                                                 int(y1 + ROIYA)),
                                                cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                (255, 255, 255), 2)
                            else:
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    trackFace.countDisappeared = trackFace.countDisappeared + 1
                                    trackFace.liveTime = trackFace.liveTime + 1
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                        continue
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                            end = time.time()
                            cv2.putText(frame,
                                        "FPS: " + str(1 // (end - start1)),
                                        (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 0, 0), 3)
                            cv2.putText(
                                frame, "Time: " + str((end - start) // 60) +
                                ":" + str(int(end - start) % 60), (200, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
                        cv2.imshow("frame", frame)
                        key = cv2.waitKey(30)
                        if key & 0xFF == ord('q'):
                            break
                        if key == 32:
                            cv2.waitKey(0)
                    else:
                        break
            except KeyboardInterrupt as e:
                pass
    gc.collect()
    cv2.destroyAllWindows()
    exit(0)
Example #27
0
def webcam_main():
    print("Camera sensor warming up...")
    cv2.namedWindow('face landmarks', cv2.WINDOW_NORMAL)
    frame = cv2.imread(IMAGE_PATH)
    time.sleep(2.0)

    mark_detector = MarkDetector(current_model, CNN_INPUT_SIZE)
    if current_model.split(".")[-1] == "pb":
        run_model = 0
    elif current_model.split(".")[-1] == "hdf5" or current_model.split(
            ".")[-1] == "h5":
        run_model = 1
    else:
        print("input model format error !")
        return

    faceboxes = mark_detector.extract_cnn_facebox(frame)
    print(faceboxes)

    if faceboxes is not None:
        facebox = faceboxes[0]

        # Detect landmarks from image of 64X64 with grayscale.
        face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
        cv2.rectangle(frame, (facebox[0], facebox[1]),
                      (facebox[2], facebox[3]), (0, 255, 0), 2)
        face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
        face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
        face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1)

        if run_model == 1:
            marks = mark_detector.detect_marks_keras(face_img0)
        else:
            marks = mark_detector.detect_marks_tensor(face_img0, 'input_2:0',
                                                      'output/BiasAdd:0')
        # marks *= 255
        marks *= facebox[2] - facebox[0]
        marks[:, 0] += facebox[0]
        marks[:, 1] += facebox[1]
        # Draw Predicted Landmarks
        mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2)

    # loop over the frames from the video stream
    while True:
        start = cv2.getTickCount()

        # frame = cv2.resize(frame, (750,750))
        # frame = cv2.flip(frame, 1)

        # fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency()
        # cv2.putText(frame, '%.1ffps'%(1/fps_time), (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0))
        # show the frame
        cv2.imshow("face landmarks", frame)
        # writer.write(frame)
        key = cv2.waitKey(1)

        # if the `q` key was pressed, break from the loop
        if key == ord("q") or key == 0xFF:
            break

    # do a bit of cleanup
    cv2.destroyAllWindows()
def Test_Video():
    # model=load_model(model_path)
    with open(model_json, 'r') as json_file:
        loaded_json = json_file.read()
    model = model_from_json(loaded_json)
    model.load_weights(model_weights)

    trainX = []
    ids = []
    for filename in os.listdir(embedfolder):
        if filename.endswith('.npz'):
            f = filename.split('.npz')
            foldername = f[0]
            data = np.load(os.path.join(embedfolder, filename))
            trainX.append(data['arr_0'])
            idc = [foldername] * data['arr_0'].shape[0]
            ids.append(idc)

    trainX = np.concatenate(trainX, axis=0)
    ids = np.concatenate(ids, axis=0)

    grid = pickle.load(open(classifierfilename, 'rb'))

    # get target mapping
    en = TolerantLabelEncoder(ignore_unknown=True)
    en.fit(ids)

    video_capture = cv2.VideoCapture(file)

    count = 0
    mark_detector = MarkDetector()
    while True:
        ret, frame = video_capture.read()
        count += 1
        if ret == False:
            break
        if count % 5 == 0:
            frame = cv2.flip(frame, 1)
            height, width = frame.shape[:2]
            frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            facebox = mark_detector.extract_cnn_facebox(frame)
            if facebox is not None:
                # if len(faces)> 0:
                x1 = max(facebox[0] - 0, 0)
                x2 = min(facebox[2] + 0, width)
                y1 = max(facebox[1] - 0, 0)
                y2 = min(facebox[3] + 0, height)

                face = dlib.rectangle(x1, y1, x2, y2)
                face_aligned = face_aligner.align(frame, frame_gray, face)
                embedding = model.predict(
                    prewhiten(face_aligned).reshape(-1, FACE_WIDTH,
                                                    FACE_HEIGHT, 3))

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                yhat_test = grid.predict(embedding)
                yhat_prob = grid.predict_proba(embedding)
                id = np.argmax(yhat_prob)
                # print(yhat_prob[0][id])
                if yhat_prob[0][id] < th:
                    print(yhat_prob[0][id], 'unknown')
                else:
                    print(yhat_prob[0][id], en.inverse_transform(id))

            cv2.imshow('Video', frame)

        if cv2.waitKey(10) == 27:
            break

    video_capture.release()
    cv2.destroyAllWindows()
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
if __name__ == '__main__':
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    thread = threading.Thread(target=get_face,
                              args=(mark_detector, img_queue, box_queue))
    thread.daemon = True
    thread.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))