Beispiel #1
0
    def process(self, image, output_path=None, output_name=None):
        image_origin = image
        check_dir(output_path)

        cv2.imwrite(join(output_path, output_name + '.origin.png'),
                    image_origin)
        # image_name =
        # image_origin = cv2.imread(image_path)
        image_origin_height, image_origin_width = image_origin.shape[0:2]
        # print('Width', image_origin_width, 'Height', image_origin_height)

        image_crop, image_edge = format_image_rgb(image_origin)
        cv2.imwrite(join(output_path, output_name + '.landmark.crop.png'),
                    image_crop)
        # print('Image Data', image_crop、, 'Image Edge', image_edge)

        image_crop_resize = cv2.resize(image_crop, (128, 128))
        cv2.imwrite(join(output_path, output_name + '.landmark.resize.png'),
                    image_crop_resize)

        # image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
        # print('Image', image_crop_resize)

        predictions = self.sess.run(self.landmark_logits,
                                    feed_dict={self.inputs: image_crop_resize})
        # print(predictions)
        # print('Len predictions', predictions)

        marks = np.array(predictions).flatten()
        marks = np.reshape(marks, (-1, 2))
        # print(marks)

        # width =
        # print('Image edge shape', image_edge)
        # to do multiply
        marks *= (image_edge[2] - image_edge[0])
        marks[:, 0] += image_edge[0]
        marks[:, 1] += image_edge[1]
        # print(marks)

        with open(join(output_path, output_name + '.marks.txt'),
                  'w',
                  encoding='utf-8') as f:
            f.write(json.dumps(marks.tolist()))

        for mark in marks:
            cv2.circle(image_origin, tuple(mark), 3, (255, 0, 0))

        cv2.imwrite(join(output_path, output_name + '.landmark.png'),
                    image_origin)

        pose_estimator = PoseEstimator(img_size=(image_origin_height,
                                                 image_origin_width))
        # pose_estimator
        pose = pose_estimator.solve_pose_by_68_points(marks)
        print('Pose', pose)

        with open(join(output_path, output_name + '.pose.txt'),
                  'w',
                  encoding='utf-8') as f:
            f.write(json.dumps(pose))

        return pose
def main(strargument):
    shutil.rmtree("./test")
    os.mkdir("./test")
    os.remove("result.txt")
    f = open("result.txt", "a")
    cap = cv2.VideoCapture(strargument)
    # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    #cap = cv2.VideoCapture("NTQ.mkv")
    #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi")
    #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg")
    # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview")
    success, frame = cap.read()
    startId = countIdFolder("./face_db/")
    # quit if unable to read the video file
    if not success:
        print('Failed to read video')
        sys.exit(1)
    #The color of the rectangle we draw around the face
    rectangleColor = (0, 165, 255)
    #variables holding the current frame number and the current faceid
    frameCounter = 0
    currentFaceID = 0
    #Variables holding the correlation trackers and the name per faceid
    conf = configparser.ConfigParser()
    conf.read("config/main.cfg")
    mtcnn_detector = load_mtcnn(conf)
    MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH")
    VERIFICATION_THRESHOLD = float(
        conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD"))
    FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH")
    BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH"))
    MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE"))
    MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL"))
    YAWL = int(conf.get("CUSTOM", "YAWL"))
    YAWR = int(conf.get("CUSTOM", "YAWR"))
    PITCHL = int(conf.get("CUSTOM", "PITCHL"))
    PITCHR = int(conf.get("CUSTOM", "PITCHR"))
    ROLLL = int(conf.get("CUSTOM", "ROLLL"))
    ROLLR = int(conf.get("CUSTOM", "ROLLR"))
    MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED"))
    IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH"))
    EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y"))
    EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X"))
    SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH"))
    MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN"))
    MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE"))
    LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME"))
    ROIXL = int(conf.get("CUSTOM", "ROIXL"))
    ROIXR = int(conf.get("CUSTOM", "ROIXR"))
    ROIYB = int(conf.get("CUSTOM", "ROIYB"))
    ROIYA = int(conf.get("CUSTOM", "ROIYA"))
    maxDisappeared = MAXDISAPPEARED  ## khong xuat hien toi da 100 frame
    faces_db = load_faces(FACE_DB_PATH, mtcnn_detector)
    # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector)
    time.sleep(10)
    for item in faces_db:
        print(item["name"])
    listTrackedFace = []
    mark_detector = MarkDetector()
    tm = cv2.TickMeter()
    _, sample_frame = cap.read()
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    with tf.Graph().as_default():
        with tf.Session() as sess:
            load_mobilefacenet(MODEL_PATH)
            inputs_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            try:
                start = time.time()
                while True:
                    start1 = time.time()
                    retval, frame = cap.read()

                    #Increase the framecounter
                    frameCounter += 1
                    if retval:
                        _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR]
                        cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB),
                                      (0, 0, 255), 2)
                        good_face_index = []
                        # faces_db = load_face_db.face_db
                        if (frameCounter % 1) == 0:
                            ### embed and compare name
                            for i, face_db in enumerate(faces_db):
                                if not os.path.isdir(
                                        "./face_db/" +
                                        face_db["name"].split("_")[0]):
                                    faces_db.pop(i)
                            faces, landmarks = mtcnn_detector.detect(_frame)
                            if faces.shape[0] is not 0:
                                input_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                save_images = np.zeros(
                                    (faces.shape[0], 112, 112, 3))
                                (yaw, pitch, roll) = (0, 0, 0)
                                for i, face in enumerate(faces):
                                    if round(faces[i, 4], 6) > IS_FACE_THRESH:
                                        bbox = faces[i, 0:4]
                                        points = landmarks[i, :].reshape(
                                            (5, 2))
                                        nimg = face_preprocess.preprocess(
                                            _frame,
                                            bbox,
                                            points,
                                            image_size='112,112')
                                        save_images[i, :] = nimg
                                        nimg = nimg - 127.5
                                        nimg = nimg * 0.0078125
                                        input_images[i, :] = nimg
                                        (x1, y1, x2, y2) = bbox.astype("int")
                                        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2:
                                            continue
                                        temp = int((y2 - y1) / EXTEND_Y)
                                        y1 = y1 + temp
                                        y2 = y2 + temp
                                        temp = int((x2 - x1) / EXTEND_X)
                                        if x1 > temp:
                                            x1 = x1 - temp
                                        x2 = x2 + temp
                                        # cv2.imshow("mainframe",frame)
                                        # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2])
                                        face_img = cv2.resize(
                                            _frame[y1:y2, x1:x2], (128, 128))
                                        # cv2.imshow("ok",face_img)
                                        face_img = cv2.cvtColor(
                                            face_img, cv2.COLOR_BGR2RGB)
                                        tm.start()
                                        marks = mark_detector.detect_marks(
                                            [face_img])
                                        tm.stop()
                                        marks *= (x2 - x1)
                                        marks[:, 0] += x1
                                        marks[:, 1] += y1
                                        # mark_detector.draw_marks(
                                        #         frame, marks, color=(0, 255, 0))
                                        pose, (
                                            yaw, pitch, roll
                                        ) = pose_estimator.solve_pose_by_68_points(
                                            marks)
                                        # temp = frame
                                        # cv2.putText(temp,"yaw:  "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.putText(temp,"roll:   "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2)
                                        # cv2.imshow("frame",temp)
                                        # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR):
                                        #     good_face_index.append(i)
                                        # cv2.waitKey(0)
                                # print(good_face_index)
                                feed_dict = {inputs_placeholder: input_images}
                                emb_arrays = sess.run(embeddings,
                                                      feed_dict=feed_dict)
                                emb_arrays = sklearn.preprocessing.normalize(
                                    emb_arrays)
                                names = []
                                sims = []
                                for i, embedding in enumerate(emb_arrays):
                                    # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False:
                                    #     names.append(listTrackedFace[i].name)
                                    #     continue
                                    embedding = embedding.flatten()
                                    temp_dict = {}
                                    for com_face in faces_db:
                                        ret, sim = feature_compare(
                                            embedding, com_face["feature"],
                                            0.65)
                                        temp_dict[com_face["name"]] = sim
                                    # print(temp_dict)
                                    dictResult = sorted(temp_dict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    # print(dictResult[:5])
                                    name = ""
                                    if len(dictResult) > 0 and dictResult[0][
                                            1] > VERIFICATION_THRESHOLD:
                                        name = dictResult[0][
                                            0]  #.split("_")[0]
                                        sim = dictResult[0][1]
                                        ## wite log
                                        t = time.time()
                                        f.write(name + "___" +
                                                str((t - start) // 60) + ":" +
                                                str(int(t - start) % 60) +
                                                "\n")
                                    else:
                                        name = "unknown"
                                        sim = 0
                                    names.append(name)
                                    sims.append(sim)

                                    # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:])
                                    # if len(dictResult)>0 :
                                    # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:])
                                    ################################ tracker
                                for i, embedding in enumerate(emb_arrays):
                                    embedding = embedding.flatten()
                                    ResultDict = {}
                                    for objectTrackFace in listTrackedFace:
                                        tempList = []
                                        (x1, y1, x2,
                                         y2) = objectTrackFace.latestBox
                                        for com_face in objectTrackFace.listEmbedding:
                                            ret, sim = feature_compare(
                                                embedding, com_face, 0.65)
                                            tempList.append(sim)
                                        tempList.sort(reverse=True)
                                        if len(tempList) > 0:
                                            if tempList[0] > 0.9 or (
                                                    similarIOU(
                                                        faces[i, :4].astype(
                                                            "int"),
                                                        objectTrackFace.
                                                        latestBox) and
                                                (frameCounter - objectTrackFace
                                                 .latestFrameCounter) < 3):
                                                ResultDict[objectTrackFace.
                                                           name] = tempList[0]
                                    dictResult = sorted(ResultDict.items(),
                                                        key=lambda d: d[1],
                                                        reverse=True)
                                    if True:
                                        if len(
                                                ResultDict
                                        ) > 0 and dictResult[0][
                                                1] > SIMILAR_THRESH:  ## neu khop -- 0.5
                                            # for ik in range(len(dict)):
                                            #     if dict[ik][1]>SIMILAR_THRESH:

                                            nameTrackCurrent = dictResult[0][0]
                                            for index, tempFaceTrack in enumerate(
                                                    listTrackedFace):
                                                if tempFaceTrack.name == nameTrackCurrent:
                                                    if len(tempFaceTrack.
                                                           listImage
                                                           ) > MAX_LIST_LEN:
                                                        tempFaceTrack.listImage.pop(
                                                            0)
                                                        tempFaceTrack.listEmbedding.pop(
                                                            0)
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    else:
                                                        if measureGoodFace(
                                                                MIN_FACE_SIZE,
                                                                MAX_BLACK_PIXEL,
                                                                save_images[
                                                                    i, :], yaw,
                                                                pitch, roll,
                                                                BLUR_THRESH,
                                                                YAWL, YAWR,
                                                                PITCHL, PITCHR,
                                                                ROLLL, ROLLR):
                                                            tempFaceTrack.listImage.append(
                                                                save_images[
                                                                    i, :])
                                                            tempFaceTrack.listEmbedding.append(
                                                                emb_arrays[i])
                                                    if names[i] != "unknown":
                                                        if RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            tempFaceTrack.name = names[
                                                                i]
                                                        # else: #################
                                                        #     names[i] = nameTrackCurrent
                                                    else:
                                                        if not RepresentsInt(
                                                                nameTrackCurrent
                                                        ):
                                                            names[
                                                                i] = nameTrackCurrent
                                                    tempFaceTrack.countDisappeared = 0
                                                    tempFaceTrack.latestBox = faces[
                                                        i, 0:4].astype("int")
                                                    tempFaceTrack.latestFrameCounter = frameCounter
                                                    tempFaceTrack.liveTime = 0
                                                    tempFaceTrack.justAdded = True  ## but we still action with it
                                                    break

                                        else:  ## neu khong khop thi tao moi nhung chi them anh khi mat du tot
                                            if len(ResultDict) > 0:
                                                print(dictResult[0][1])
                                            if names[i] != "unknown":
                                                newTrackFace = trackedFace(
                                                    names[i])
                                            else:
                                                newTrackFace = trackedFace(
                                                    str(currentFaceID))
                                                currentFaceID = currentFaceID + 1
                                            if measureGoodFace(
                                                    MIN_FACE_SIZE,
                                                    MAX_BLACK_PIXEL,
                                                    save_images[i, :], yaw,
                                                    pitch, roll, BLUR_THRESH,
                                                    YAWL, YAWR, PITCHL, PITCHR,
                                                    ROLLL, ROLLR):
                                                newTrackFace.listImage.append(
                                                    save_images[i, :])
                                                newTrackFace.listEmbedding.append(
                                                    emb_arrays[i])
                                            newTrackFace.latestBox = faces[
                                                i, 0:4].astype("int")
                                            newTrackFace.latestFrameCounter = frameCounter
                                            # print(newTrackFace.latestBox)
                                            newTrackFace.justAdded = True
                                            listTrackedFace.append(
                                                newTrackFace)  ## add list
                                ### disappeared
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    if trackFace.justAdded == False:
                                        trackFace.countDisappeared = trackFace.countDisappeared + 1
                                        trackFace.liveTime = trackFace.liveTime + 1
                                    else:
                                        trackFace.justAdded = False
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                                for i, face in enumerate(faces):
                                    x1, y1, x2, y2 = faces[i][0], faces[i][
                                        1], faces[i][2], faces[i][3]
                                    x1 = max(int(x1), 0)
                                    y1 = max(int(y1), 0)
                                    x2 = min(int(x2), _frame.shape[1])
                                    y2 = min(int(y2), _frame.shape[0])
                                    cv2.rectangle(frame,
                                                  (x1 + ROIXL, y1 + ROIYA),
                                                  (x2 + ROIXL, y2 + ROIYA),
                                                  (0, 255, 0), 2)
                                    # if i in good_face_index:
                                    # if not RepresentsInt(names[i]):
                                    cv2.putText(frame, names[i].split("_")[0],
                                                (int(x1 / 2 + x2 / 2 + ROIXL),
                                                 int(y1 + ROIYA)),
                                                cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                (255, 255, 255), 2)
                            else:
                                for index, trackFace in enumerate(
                                        listTrackedFace):
                                    trackFace.countDisappeared = trackFace.countDisappeared + 1
                                    trackFace.liveTime = trackFace.liveTime + 1
                                    if trackFace.liveTime > LIVE_TIME:
                                        t = listTrackedFace.pop(index)
                                        del t
                                        continue
                                    if trackFace.countDisappeared > maxDisappeared:
                                        if len(
                                                trackFace.listImage
                                        ) < MIN_FACE_FOR_SAVE:  ## neu chua duoc it nhat 5 mat thi xoa luon
                                            trackedFace.countDisappeared = 0
                                            continue
                                        if trackFace.saveTrackedFace(
                                                "./temp/", startId):
                                            startId = startId + 1
                                        t = listTrackedFace.pop(index)
                                        del t
                            end = time.time()
                            cv2.putText(frame,
                                        "FPS: " + str(1 // (end - start1)),
                                        (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                        (0, 0, 0), 3)
                            cv2.putText(
                                frame, "Time: " + str((end - start) // 60) +
                                ":" + str(int(end - start) % 60), (200, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3)
                        cv2.imshow("frame", frame)
                        key = cv2.waitKey(30)
                        if key & 0xFF == ord('q'):
                            break
                        if key == 32:
                            cv2.waitKey(0)
                    else:
                        break
            except KeyboardInterrupt as e:
                pass
    gc.collect()
    cv2.destroyAllWindows()
    exit(0)
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
                              ], [405.01886, 98.83813], [
                                  310.4157,
                                  218.5501,
                              ], [327.8565, 211.95906], [347.26505, 207.37141],
                              [356.66602, 209.59866], [366.31415, 207.59067],
                              [380.94736, 211.48885], [392.5682, 217.67201],
                              [380.5252, 229.08566], [
                                  368.8378,
                                  235.0082,
                              ], [355.32568, 236.70053],
                              [340.88968, 235.58282], [326.3202, 229.22781],
                              [314.68103, 218.55443], [343.84302, 217.35928],
                              [355.64767, 218.21783], [367.20605, 217.28725],
                              [388.59412, 218.09315], [367.44897, 221.60754],
                              [355.48022, 222.92123], [342.7749, 221.94604]])
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
Beispiel #5
0
def main():

    ''' main function'''

    img_dir = "img_dir/"     # image file dir

    # setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_list = []
    for filename in os.listdir(img_dir):
        frame = cv2.imread(img_dir + filename)
        img_list.append(frame)
        img_queue.put(frame)
    box_process = Process(target=get_face, args=(img_queue, box_queue))
    box_process.start()

    # introduce mark_detector to detect landmarks
    mark_detector = MarkDetector()

    while True:
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip
        # frame = cv2.flip(frame, 2)

        if len(img_list) != 0:
            frame = img_list.pop(0)
            raw_frame = frame.copy()
            # introduce pos estimator to solve pose. Get one frames to setup the
            # estimator according to the images size.
            height, width = frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))
        else:
            break

        # Pose estimation by 3 steps.
        # 1. detect faces.
        # 2. detect landmarks.
        # 3. estimate pose.

        # get face from box queue.
        faceboxes = box_queue.get()
        # print("the length of facebox is " + str(len(faceboxes)))
        for facebox in faceboxes:
            # detect landmarks from image 128 * 128
            face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]    # cut off the area of face.

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)    # BGR -> RGB
            marks = mark_detector.detect_marks(face_img)

            # covert the marks locations from local CNN to global image.
            marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle.
            marks[:, 1] *=  (facebox[3] - facebox[1])  # the length of rectangle.
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # uncomment following line to show raw marks.
            # util.draw_marks(frame, marks, color=(0, 255, 0))
            # util.draw_faceboxes(frame, facebox)

            # try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
           
            # pose[0] is rotation_vector, and pose[1] is translation_vector
            
            pose_np = np.array(pose).flatten()
            pose = np.reshape(pose_np, (-1, 3))

            angle = util.get_angle(pose[0])
            pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2])

        # show preview
        cv2.imshow("Preview", frame)
        cv2.imshow("raw_frame", raw_frame)
        if cv2.waitKey(0) == 27:
            continue

    # clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #6
0
class PoseHeadNode(object):

    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \
        #                        queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()
        
    
    def init(self):
        
        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [Stabilizer(
                state_num=2,
                measure_num=1,
                cov_process=0.1,
                cov_measure=0.1) for _ in range(6)]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #    
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))
    
    def pose_cb(self, data):
        
        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)
    
    def draw_pose(self, img, humans):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)

    def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480,640, 3))
        print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def publish_headpose_info(self, viz=True):
        
        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480*2, 640*2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes, landmark = self.detector.detect(frame, \
                thresh, scales=scales, do_flip=False)
        mess = "Not detect pose"
        
        if faceboxes is not None and len(faceboxes) > 0:
            #if isinstance(faceboxes[1], int):
            #    faceboxes = [faceboxes]
            for facebox in faceboxes:
                facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                                facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(
                     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess=round(1/(t2-t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
                
                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
Beispiel #7
0
def process_image(transform, processing_model, img):
    global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm, width, height
    tracks = []
    try:
        frame = img
        h, w, d = frame.shape
        if pose_estimator is None or w != width or h != height:
            # sample_frame = frame
            # img_queue.put(sample_frame)

            # Introduce pose estimator to solve pose. Get one frame to setup the
            # estimator according to the image size.
            height, width = h, w
            # (height, width) = (1062 , 485)
            # (height, width) = (720 , 1280)
            pose_estimator = PoseEstimator(img_size=(height, width))

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks(face_img)
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
        img = frame
    except Exception as e:
        track = traceback.format_exc()
        print(track)
        print("HandPose Exception", e)
        pass

    return tracks, img
def headPoseEstimation():
    print("HEAD POSE ESTIMATION...")
    """MAIN"""
    # Video source from webcam or video file.
    #video_src = 0
    #video_src = 'EWSN.avi'
    #cam = cv2.VideoCapture(video_src)
    #_, sample_frame = cam.read()

    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    print("1")
    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    print("2")
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    print("3")
    noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
                 [0, 0], [0, 0], [0, 0]]
    counter = 0
    font = cv2.FONT_HERSHEY_SIMPLEX
    numXPoints = 0
    numYPoints = 0
    sumX = 0
    sumY = 0

    # start = time.time()
    # previousTime1 = start
    # previousTime2 = start
    currentTime = 0
    previousTime1 = 0
    previousTime2 = 0

    directionArray = []
    moveSequence = []
    moves = []
    classifyMoves = 0
    headPoseDirection = 'emtpy'

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break
        #print("4")
        #print(frame.shape)
        #print("5")
        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        #if video_src == 0:
        #    print("6")
        #    frame = cv2.flip(frame, 2)
        #    cv2.imwrite("Preview.png", frame)
        #    print("7")

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)
        #print("8")
        # Get face from box queue.
        facebox = box_queue.get()
        #print(type(facebox)
        #print(facebox
        #print("9")
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            print(face_img.shape)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # # Get average position of nose
            noseMarksTemp = []
            noseMarksTemp.append(marks[30][0])
            noseMarksTemp.append(marks[30][1])
            noseMarks[0] = noseMarksTemp

            for i in range(9, 0, -1):
                noseMarks[i] = noseMarks[i - 1]

            # Get the direction of head movement
            headPoseDirection = calculateDirection(noseMarks)
            #if headPoseDirection != 'still':
            directionArray.append(headPoseDirection)
            #print(directionArray
            #print(len(directionArray)
            print("To capture a movement press 'a' and perform a movement.")
            #currentTime1 = time.time()
            if cv2.waitKey(5) == ord('a') and not classifyMoves:
                classifyMoves = 1
                print("Start classifying movement...")
                timer = time.time()
                currentTime = timer
                previousTime1 = timer
                previousTime2 = timer

            if cv2.waitKey(5) == ord('b') and classifyMoves:
                classifyMoves = 0
                print("Stopped classifying movement...")

            currentTime = time.time()
            if currentTime - previousTime1 > 2 and classifyMoves:
                print("------------------------------------------------")
                print("Elapsed timer 1: " + str(currentTime - previousTime1))
                #print(len(directionArray)

                # Get most common direction
                moveClass = mostCommon(directionArray)
                #moveSequence.append(moveClass)
                print(moveClass)

                # Get a sequence of head movements
                # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0:
                #     print("Elapsed timer 2: " + str(currentTime - previousTime2)
                #     numMoves = len(moveSequence)
                #     moves = moveSequence[(numMoves-5):(numMoves-1)]
                #     print(moves
                #     moveSequence = []
                #     previousTime2 = currentTime
                #     classifyMoves = 0
                classifyMoves = 0
                directionArray = []
                previousTime1 = currentTime

                print(
                    "To continue type 'c' or to recapture a movement type 'a'."
                )

            if cv2.waitKey(5) == ord('c'):
                break

            #print(previousTime
            # Uncomment following line to show raw marks.
            #mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stabile_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stabile_pose.append(ps_stb.state[0])
            stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))

        # if len(moves) > 1:
        #     cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4)
        #     cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4)

        cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0),
                    4)

        # Show preview.
        #cv2.namedWindow("", cv2.WINDOW_NORMAL)
        cv2.imshow("Preview", frame)
        #cv2.resizeWindow("preview", 5000,5000)
        if cv2.waitKey(5) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()

    return moveClass
Beispiel #9
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        engine.say(
            "Warning: video source not assigned, default webcam will be used")
        engine.runAndWait()
        video_src = 0
    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()
    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()
    gaze = GazeTracking()
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()
    head_flag = 0
    gaze_flag = 0
    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]
        #audio_record(AUDIO_OUTPUT, 3)
        #sphinx_recog(AUDIO_OUTPUT)
        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()
        gaze.refresh(frame)
        frame = gaze.annotated_frame()
        text = ""
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY)
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            rects = detector(gray, 0)
            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()
            #
            for rect in rects:
                # determine the facial landmarks for the face region, then
                # convert the facial landmark (x, y)-coordinates to a NumPy
                # array
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(
                    shape)  #converting to NumPy Array矩阵运算
                mouth = shape[Start:End]
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]
                leftEAR = eye_aspect_ratio(leftEye)  #眼睛长宽比
                rightEAR = eye_aspect_ratio(rightEye)
                ear = (leftEAR + rightEAR) / 2.0  #长宽比平均值
                lipdistance = lip_distance(shape)

                if (lipdistance > YAWN_THRESH):
                    #print(lipdistance)
                    flag0 += 1
                    print("yawning time: ", flag0)
                    if flag0 >= 40:
                        cv2.putText(frame, "Yawn Alert", (10, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        cv2.putText(frame, "Yawn Alert", (220, 150),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        engine.say("don't yawn")
                        engine.runAndWait()
                        flag0 = 0
                else:
                    flag0 = 0

                if (ear < thresh):
                    flag += 1
                    print("eyes closing time: ", flag)
                    if flag >= frame_check:
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        cv2.putText(frame,
                                    "****************ALERT!****************",
                                    (10, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                    (0, 0, 255), 2)
                        engine.say("open your eyes")
                        engine.runAndWait()
                        flag = 0
                else:
                    flag = 0
                if gaze.is_right():
                    print("Looking right")
                    text = "Looking right"
                elif gaze.is_left():
                    print("Looking left")
                    text = "Looking left"
                elif gaze.is_up():
                    text = "Looking up"
                else:
                    text = "Looking center"
                if text is not "Looking center":
                    gaze_flag += 1
                    if gaze_flag >= 20:
                        engine.say("look forward")
                        engine.runAndWait()
                        gaze_flag = 0
                else:
                    gaze_flag = 0
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            # get angles
            angles = pose_estimator.get_angles(pose[0], pose[1])
            if ((-8 > angles[0] or angles[0] > 8)
                    or (-8 > angles[1] or angles[1] > 8)):
                head_flag += 1
                if head_flag >= 40:
                    print(angles[0])
                    engine.say("please look ahead")
                    engine.runAndWait()
            else:
                head_flag = 0
            # pose_estimator.draw_info(frame, angles)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               pose[0],
                                               pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
            #pose_estimator.show_3d_model

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #10
0
class PoseHeadNode(object):
    def __init__(self):

        # subscribe
        rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb)
        rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback)

        # publish
        self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \
                                queue_size=10)
        #data
        self.humans = None
        self.color_img = None

        self.init()

    def init(self):

        self.detector = RetinaFace('./model/R50', 0, 0, 'net3')
        self.mark_detector = MarkDetector()
        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(6)
        ]

        sample_img = None
        #if self.color_img is None:
        #    print("None")
        #    time.sleep(0.2)
        #
        #height, width, _ = self.color_img.shape

        self.pose_estimator = PoseEstimator(img_size=(480, 640))

    def pose_cb(self, data):

        if self.color_img is None:
            return

        h, w = self.color_img.shape[:2]

        # ros topic to Person instance
        humans = []
        for p_idx, person in enumerate(data.persons):
            human = Human([])
            for body_part in person.body_part:
                part = BodyPart('', body_part.part_id, body_part.x,
                                body_part.y, body_part.confidence)
                human.body_parts[body_part.part_id] = part

            humans.append(human)

        self.humans = humans

        #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False)

    def draw_pose(self, img, humans, parts=[4, 7]):

        if img is None or humans is None:
            #print("Cannot draw pose on {} image and {} humans".format(img, humans))
            return None

        image_h, image_w, _ = img.shape
        if parts is None or len(parts) == 0:
            return TfPoseEstimator.draw_humans(img, humans, imgcopy=False)
        else:

            for human in humans:
                for part in parts:

                    if part in human.body_parts.keys():
                        body_part = human.body_parts[part]
                        coord = (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))
                        img = cv2.circle(img, coord, 2, (0, 255, 0))
            return img

    def color_callback(
        self, data
    ):  # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function
        #print("Having color image")
        #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding)
        #print(cv_image)
        decoded = np.frombuffer(data.data, np.uint8)

        img = np.reshape(decoded, (480, 640, 3))
        #print("COLOR_CALLBACK", img.shape)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.color_img = img

    def get_top_vertice_human(self, human, img_w, img_h):
        sorted_id = sorted(human["pose"].body_parts.keys())
        body_part = human["pose"].body_parts[sorted_id[0]]
        return (int(body_part.x * image_w + 0.5), \
                                int(body_part.y * image_h + 0.5))

    def is_gazing(self, face_direction, human, img):
        def get_angle_direct_with_peakhand(face_direction, peak):
            d0, d1 = face_direction
            peak_direction = (peak[0] - d0[0], peak[1] - d0[1])
            face_direct = (d1[0] - d0[0], d1[1] - d0[1])

            d_peak_direction = np.sqrt(peak_direction[0]**2 +
                                       peak_direction[1]**2)
            d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2)
            return (peak_direction[0]*face_direct[0] + \
                    peak_direction[1]*face_direct[1])/(d_peak_direction*\
                    d_face_direct)

        thresh = 0.1  #0.5 #cos(PI/3)

        image_h, image_w, _ = img.shape
        if 4 in human.body_parts.keys():
            body_part = human.body_parts[4]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle left = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        if 7 in human.body_parts.keys():
            body_part = human.body_parts[7]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))
            print("angle right = ",
                  get_angle_direct_with_peakhand(face_direction, coord))
            return get_angle_direct_with_peakhand(face_direction,
                                                  coord) > thresh

        return False

    def get_gazing_status(self, face_directions, face_coords, humans, img):
        def get_dist_to_humans(human_coords, coord):
            result = []

            for _coord in human_coords:
                result.append(np.sqrt((coord[0]-_coord[0])**2) + \
                        (coord[1]-_coord[1])**2)
            return result

        image_h, image_w, _ = img.shape
        gazing_status = []

        human_coords = []

        for human in humans:

            sorted_id = sorted(human.body_parts.keys())
            body_part = human.body_parts[sorted_id[0]]
            coord = (int(body_part.x * image_w + 0.5), \
                    int(body_part.y * image_h + 0.5))

            human_coords.append(coord)

        for fdirection, fcoord in zip(face_directions, face_coords):
            dists = get_dist_to_humans(human_coords, fcoord)
            idx = np.argmin(dists)
            gazing_status.append(self.is_gazing(fdirection, humans[idx], img))
            del human_coords[idx]

        return gazing_status

    def publish_headpose_info(self, viz=True):

        thresh = 0.8
        time.sleep(0.1)
        if self.color_img is None or self.humans is None:
            print("Color img None")
            return
        im_shape = self.color_img.shape
        #scales = [1024, 1980]
        scales = [480 * 2, 640 * 2]
        target_size = scales[0]
        max_size = scales[1]
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        thresh = 0.8

        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)

        scales = [im_scale]

        frame = copy.deepcopy(self.color_img)

        t1 = time.time()
        faceboxes = self.mark_detector.extract_cnn_facebox(frame)
        mess = "Not detect pose"
        face_directions = []
        face_coords = []

        if faceboxes is not None and len(faceboxes) > 0:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                #facebox = facebox.astype(np.int)
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_coords.append((int((facebox[0]+facebox[2])/2), \
                                int((facebox[1]+facebox[3])/2)))
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                marks = self.mark_detector.detect_marks([face_img])

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = self.pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                face_direction = self.pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))
                face_directions.append(face_direction)
                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                #self.pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])
        gazing_status = self.get_gazing_status(face_directions, face_coords, \
                copy.deepcopy(self.humans), self.color_img)

        msg = GazingInfo()
        msg.is_gazing = []
        msg.coords = []

        for coord, gazestatus in zip(face_coords, gazing_status):
            _coord = Coord2D()
            _coord.x = coord[0]
            _coord.y = coord[1]
            msg.coords.append(_coord)
            msg.is_gazing.append(gazestatus)

        self.pub_headpose_info.publish(msg)

        print(gazing_status)
        cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \
                cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2)
        # Show preview.
        if viz:
            frame = self.draw_pose(frame, self.humans)
            if frame is None:
                return
            cv2.imshow("Preview", frame)
            cv2.waitKey(1)
def main(video_src):
    """MAIN
    
    :param video_src: Source of video to analyze
    :type video_src: str or int"""
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    face_detector = FaceDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(mark_detector, img_queue, box_queue,))
    #box_process = Process(target=get_faces, args=(face_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    #face expression recognizer initialization
    from keras.models import model_from_json
    model = model_from_json(open("./model/facial_expression_model_structure.json", "r").read())
    model.load_weights('./model/facial_expression_model_weights.h5') #load weights

    #-----------------------------

    emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')

    while True:
        input()
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.

        #facebox = box_queue.get()

        faceboxes = dump_queue(box_queue)
        print("{} FACEBOXES".format(len(faceboxes)))
        for facebox in faceboxes:
            if min(facebox) < 0:
                continue
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]

            if not face_img.shape[0] or not face_img.shape[1]:
                continue

            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            detected_face = frame[facebox[1]: facebox[3],facebox[0]: facebox[2]] #crop detected face
            detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale
            detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48



            # emotion estimation
            img_pixels = image.img_to_array(detected_face)
            img_pixels = np.expand_dims(img_pixels, axis = 0)

            img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1]

            predictions = model.predict(img_pixels) #store probabilities of 7 expressions

            #find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral
            max_index = np.argmax(predictions[0])

            emotion = emotions[max_index]

            #write emotion text above rectangle

            #V_FONT_HERSHEY_SIMPLEX normal size sans-serif font
            #CV_FONT_HERSHEY_PLAIN small size sans-serif font
            #CV_FONT_HERSHEY_DUPLEX normal size sans-serif font (more complex than CV_FONT_HERSHEY_SIMPLEX )
            #CV_FONT_HERSHEY_COMPLEX normal size serif font
            #CV_FONT_HERSHEY_TRIPLEX normal size serif font (more complex than CV_FONT_HERSHEY_COMPLEX )
            #CV_FONT_HERSHEY_COMPLEX_SMALL smaller version of CV_FONT_HERSHEY_COMPLEX
            #CV_FONT_HERSHEY_SCRIPT_SIMPLEX hand-writing style font
            #CV_FONT_HERSHEY_SCRIPT_COMPLEX more complex variant of CV_FONT_HERSHEY_SCRIPT_SIMPLEX

            image_text = ""
            for index in range(len(emotions)):
                if predictions[0][index]>0.3:
                    image_text += "{0} : {1} %\n".format(emotions[index], int(predictions[0][index]*100))

            space = 0
            for text in image_text.strip().split("\n"):
                cv2.putText(
                img = frame,
                text= text,
                org =(int(facebox[0]), int(facebox[1])-space),
                fontFace = cv2.FONT_HERSHEY_PLAIN,
                fontScale = 0.8,
                color   =  (255,255,255),
                thickness = 1
                )
                space += int(0.25*48)



            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
            #pose = pose_estimator.solve_pose(marks)


            # Stabilize the pose.
            #stabile_pose = []
            #pose_np = np.array(pose).flatten()
            #for value, ps_stb in zip(pose_np, pose_stabilizers):
            #    ps_stb.update([value])
            #    stabile_pose.append(ps_stb.state[0])
            #stabile_pose = np.reshape(stabile_pose, (-1, 3))

            # Uncomment following line to draw pose annotaion on frame.
            pose_estimator.draw_annotation_box(
                 frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotaion on frame.
            #pose_estimator.draw_annotation_box(
            #    frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128))


        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #12
0
class HeadPoseEstimator():
    def __init__(self):
        print "OpenCV version: {}".format(cv2.__version__)
        self.CNN_INPUT_SIZE = 128

        self.mark_detector = MarkDetector()

        height = 480
        width = 640
        self.pose_estimator = PoseEstimator(img_size=(height, width))

        self.pose_stabilizers = [
            Stabilizer(state_num=2,
                       measure_num=1,
                       cov_process=0.1,
                       cov_measure=0.1) for _ in range(8)
        ]

        self.img_queue = Queue()
        self.box_queue = Queue()
        self.box_process = Process(target=self.get_face,
                                   args=(
                                       self.mark_detector,
                                       self.img_queue,
                                       self.box_queue,
                                   ))

        self.process_ctrl_flag = rospy.get_param("process_ctrl", True)
        self.process_ctrl_subscriber = rospy.Subscriber(
            "/head_pose_estimator/process_ctrl", Bool,
            self.process_ctrl_callback)

        self.sub_img_name = rospy.get_param("sub_img_name",
                                            "/usb_cam/image_raw")
        self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image,
                                               self.img_callback)

        self.show_result_img_flag = rospy.get_param("show_result_img", True)
        self.show_axis_flag = rospy.get_param("show_axis", True)
        self.show_annotation_box_flag = rospy.get_param(
            "show_annotation_box", True)
        self.pub_result_img_flag = rospy.get_param("pub_result_img", True)

        self.img_publisher = rospy.Publisher(
            "/head_pose_estimator/result_image", Image, queue_size=10)

        self.result_publisher = rospy.Publisher(
            "/head_pose_estimator/head_pose", HeadPose, queue_size=10)

        self.box_process.start()

    def get_face(self, detector, img_queue, box_queue):
        """
        画像キューから顔を取り出します。
        処理はマルチプロセッシングで行います。
        """
        while not rospy.is_shutdown():
            image = img_queue.get()
            box = detector.extract_cnn_facebox(image)
            box_queue.put(box)

    def process_ctrl_callback(self, msg):
        self.process_ctrl_flag = msg.data

    def euler_to_quaternion(self, euler):
        # オイラー角からクォータニオンに変換
        # 鏡写しになるように値を調整
        q = tf.transformations.quaternion_from_euler(euler.x, euler.y, euler.z)
        return Quaternion(x=q[1], y=-q[2], z=-q[3], w=q[0])
        #return Quaternion(x=q[0], y=q[1], z=q[2], w=q[3])

    def img_callback(self, img_msg):
        if self.process_ctrl_flag == True:
            try:
                cv_img = CvBridge().imgmsg_to_cv2(img_msg, "bgr8")
                flip_frame = cv2.flip(cv_img, 2)  # 画像を鏡写しにする
                frame = cv_img

                img_height, img_width, _ = frame.shape[:3]

                self.img_queue.put(frame)

                facebox = self.box_queue.get()

                # 顔の検出に成功
                if facebox is not None:
                    # 画像から顔をトリミングしてCNNでlandmarksを見つける
                    # img[top : bottom, left : right]
                    face_img = frame[facebox[1]:facebox[3],
                                     facebox[0]:facebox[2]]
                    face_img = cv2.resize(
                        face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE))
                    face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                    face_rect = FaceRect()
                    face_rect.x = facebox[0]
                    face_rect.y = facebox[1]
                    face_rect.width = facebox[2] - facebox[0]
                    face_rect.height = facebox[3] - facebox[1]

                    #face_rect.x = img_width - facebox[0] - (facebox[2] - facebox[0])
                    #face_rect.y = facebox[1]
                    #face_rect.width = facebox[2] - facebox[0]
                    #face_rect.height = facebox[3] - facebox[1]

                    marks = self.mark_detector.detect_marks([face_img])

                    marks *= (facebox[2] - facebox[0])
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]

                    # Uncomment following line to show raw marks.
                    #self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                    # Uncomment following line to show facebox.
                    #self.mark_detector.draw_box(frame, [facebox])

                    pose = self.pose_estimator.solve_pose_by_68_points(marks)
                    #rotation = pose[0]
                    #euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2])
                    #quaternion = self.euler_to_quaternion(euler)

                    # stabilize the pose
                    steady_pose = []
                    pose_np = np.array(pose).flatten()
                    for value, ps_stb in zip(pose_np, self.pose_stabilizers):
                        ps_stb.update([value])
                        steady_pose.append(ps_stb.state[0])
                    steady_pose = np.reshape(steady_pose, (-1, 3))

                    rotation = steady_pose[0]
                    euler = Vector3(x=rotation[0],
                                    y=rotation[1],
                                    z=rotation[2])
                    head_rotation = self.euler_to_quaternion(euler)
                    #print head_rotation
                    #print "---"

                    head_pose = HeadPose()
                    head_pose.face_rect = face_rect
                    head_pose.head_rotation = head_rotation
                    self.result_publisher.publish(head_pose)

                    # Uncomment following line to draw pose annotation on frame.
                    #self.pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128))

                    # Uncomment following line to draw stabile pose annotation on frame.
                    if self.show_annotation_box_flag == True:
                        self.pose_estimator.draw_annotation_box(frame,
                                                                steady_pose[0],
                                                                steady_pose[1],
                                                                color=(128,
                                                                       255,
                                                                       128))

                    # Uncomment following line to draw head axes on frame.
                    # openCV3.4.0では動かない
                    #self.pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

                    if self.show_axis_flag == True:
                        self.pose_estimator.draw_axis(frame, steady_pose[0],
                                                      steady_pose[1])

                if self.show_result_img_flag == True:
                    cv2.imshow("result", frame)
                    cv2.waitKey(1)

                if self.pub_result_img_flag == True:
                    try:
                        pub_img = CvBridge().cv2_to_imgmsg(frame, "bgr8")
                        self.img_publisher.publish(pub_img)

                    except CvBridgeError, e:
                        rospy.logerr(str(e))

            except CvBridgeError, e:
                rospy.logerr(str(e))
def process(input_key):
    if exists('pretrain/' + input_key):
        return

    # input_key = data['keys']
    input_file = get_video_file(input_key)

    print('Input file', input_file)

    stream = cv2.VideoCapture(input_file)

    mark_detector = MarkDetector()
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    frame = None
    count = 0
    frame_got = True

    target_folder = 'pretrain/{key}'.format(key=input_key)

    if not exists(target_folder):
        makedirs(target_folder)

    while frame_got:
        frame_got, frame = stream.read()
        if frame_got:
            # print('Frame', frame)
            facebox = mark_detector.extract_cnn_facebox(frame)
            # print('Facebox', facebox)

            if facebox:

                height, width = frame.shape[:2]
                pose_estimator = PoseEstimator(img_size=(height, width))
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img, (input_size, input_size))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                marks = mark_detector.detect_marks(face_img)

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # print('Marks', marks)

                for mark in marks:
                    cv2.circle(frame, tuple(mark), 3, (255, 0, 0))

                # print('Marks Length', len(marks))

                pose = pose_estimator.solve_pose_by_68_points(marks)
                # print('Pose', pose)
                pose = [pose[0].tolist(), pose[1].tolist()]
                with open(
                        '{target_folder}/vecs.{count}.txt'.format(
                            count=count, target_folder=target_folder),
                        'w') as f:
                    f.write(json.dumps(pose))
                with open(
                        '{target_folder}/marks.{count}.txt'.format(
                            count=count, target_folder=target_folder),
                        'w') as f:
                    f.write(json.dumps(marks.tolist()))
                cv2.imwrite(
                    '{target_folder}/image.{count}.png'.format(
                        count=count, target_folder=target_folder), frame)
            count += 1
Beispiel #14
0
def main():
    #bagreader = BagFileReader(args.video, 640,480,848,480,30,30)
    bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15)

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()
    sample_frame = bagreader.get_color_frame()
    sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB)
    height, width, _ = sample_frame.shape
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps,
                          (width, height))

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:

        t1 = time.time()
        # Read frame, crop it, flip it, suits your needs.
        frame = bagreader.get_color_frame()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if frame is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        # if video_src == 0:
        #     frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        faceboxes = box_queue.get()
        print(faceboxes)

        mess = "Not detect pose"

        if faceboxes is not None:
            if isinstance(faceboxes[1], int):
                faceboxes = [faceboxes]
            for facebox in faceboxes:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                face_img = cv2.resize(face_img,
                                      (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks([face_img])
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(
                #     frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                pose_estimator.draw_annotation_box(frame,
                                                   pose[0],
                                                   pose[1],
                                                   color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                t2 = time.time()
                mess = round(1 / (t2 - t1), 2)
                # pose_estimator.draw_annotation_box(
                #     frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1])

        cv2.putText(frame,
                    "FPS: " + "{}".format(mess), (20, 20),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.75, (0, 255, 0),
                    thickness=2)
        # Show preview.
        cv2.imshow("Preview", frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    out.release()
    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
Beispiel #15
0
class face_detector():
	def __init__(self):
		# Load the parameters
		self.conf = config()
		# initialize dlib's face detector (HOG-based) and then create the
		# facial landmark predictor
		print("[INFO] loading facial landmark predictor...")
		self.detector = dlib.get_frontal_face_detector()
		self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path)
		
		# grab the indexes of the facial landmarks for the left and
		# right eye, respectively
		(self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
		(self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
		
		# initialize the video stream and sleep for a bit, allowing the
		# camera sensor to warm up
		self.cap = cv2.VideoCapture(0)
		if self.conf.vedio_path == 0:
			self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
		_, sample_frame = self.cap.read()
		
		# Introduce mark_detector to detect landmarks.
		self.mark_detector = MarkDetector()
		
		# Setup process and queues for multiprocessing.
		self.img_queue = Queue()
		self.box_queue = Queue()
		self.img_queue.put(sample_frame)
		self.box_process = Process(target=get_face, args=(
			self.mark_detector, self.img_queue, self.box_queue,))
		self.box_process.start()
		
		# Introduce pose estimator to solve pose. Get one frame to setup the
		# estimator according to the image size.
		self.height, self.width = sample_frame.shape[:2]
		self.pose_estimator = PoseEstimator(img_size=(self.height, self.width))
		
		# Introduce scalar stabilizers for pose.
		self.pose_stabilizers = [Stabilizer(
			state_num=2,
			measure_num=1,
			cov_process=0.1,
			cov_measure=0.1) for _ in range(6)]
		
		self.tm = cv2.TickMeter()
		# Gaze tracking
		self.gaze = GazeTracking()
	
	def detect(self):
		# loop over the frames from the video stream
		temp_steady_pose = 0
		while True:
			# grab the frame from the threaded video stream, resize it to
			# have a maximum width of 400 pixels, and convert it to
			# grayscale
			frame_got, frame = self.cap.read()
			
			# Empty frame
			frame_empty = np.zeros(frame.shape)
			
			# frame = imutils.rotate(frame, 90)
			frame = imutils.resize(frame, width=400)
			gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
			
			# detect faces in the grayscale frame
			rects = self.detector(gray, 0)
			
			# initialize the frame counters and the total number of blinks
			TOTAL = 0
			COUNTER = 0
			# loop over the face detections
			for (i, rect) in enumerate(rects):
				# determine the facial landmarks for the face region, then
				# convert the facial landmark (x, y)-coordinates to a NumPy
				# array
				self.shape = self.predictor(gray, rect)
				self.shape = face_utils.shape_to_np(self.shape)
				
				# ********************************
				# Blink detection
				# extract the left and right eye coordinates, then use the
				# coordinates to compute the eye aspect ratio for both eyes
				self.leftEye = self.shape[self.lStart:self.lEnd]
				self.rightEye = self.shape[self.rStart:self.rEnd]
				self.leftEAR = eye_aspect_ratio(self.leftEye)
				self.rightEAR = eye_aspect_ratio(self.rightEye)
				
				# average the eye aspect ratio together for both eyes
				ear = (self.leftEAR + self.rightEAR) / 2.0
				
				# check to see if the eye aspect ratio is below the blink
				# threshold, and if so, increment the blink frame counter
				if ear < self.conf.EYE_AR_THRESH:
					COUNTER += 1
				
				# otherwise, the eye aspect ratio is not below the blink
				# threshold
				else:
					# if the eyes were closed for a sufficient number of
					# then increment the total number of blinks
					if COUNTER >= self.conf.EYE_AR_CONSEC_FRAMES:
						TOTAL += 1
					
					# reset the eye frame counter
					COUNTER = 0
				
				# Frame empty
				cv2.putText(frame_empty, "Blinks: {}".format(TOTAL), (30, 60),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				cv2.putText(frame_empty, "EAR: {:.2f}".format(ear), (30, 90),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				
				# ********************************
				# convert dlib's rectangle to a OpenCV-style bounding box
				# [i.e., (x, y, w, h)], then draw the face bounding box
				(x, y, w, h) = face_utils.rect_to_bb(rect)
				self.bounding_box = (x, y, w, h)
				# cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
				
				# Frame empty
				cv2.rectangle(frame_empty, (x, y), (x + w, y + h), (0, 255, 0), 2)
				
				# show the face number
				cv2.putText(frame_empty, "Face #{}".format(i + 1), (30, 120),
							cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2)
				
				# loop over the (x, y)-coordinates for the facial landmarks
				# and draw them on the image
				for (x, y) in self.shape:
					# cv2.circle(frame, (x, y), 1, (0, 255, 255), -1)
					cv2.circle(frame_empty, (x, y), 1, (0, 255, 255), -1)
			
			# **********************************************************
			if frame_got is False:
				break
			
			# If frame comes from webcam, flip it so it looks like a mirror.
			if self.conf.vedio_path == 0:
				frame = cv2.flip(frame, 2)
			
			# Pose estimation by 3 steps:
			# 1. detect face;
			# 2. detect landmarks;
			# 3. estimate pose
			
			# Feed frame to image queue.
			self.img_queue.put(frame)
			
			# Get face from box queue.
			self.facebox = self.box_queue.get()
			
			if self.facebox is not None:
				# Detect landmarks from image of 128x128.
				face_img = frame[self.facebox[1]: self.facebox[3],
						   self.facebox[0]: self.facebox[2]]
				face_img = cv2.resize(face_img, (self.conf.CNN_INPUT_SIZE, self.conf.CNN_INPUT_SIZE))
				face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
				
				self.tm.start()
				# marks = self.mark_detector.detect_marks([face_img])
				self.tm.stop()
				
				# Convert the marks locations from local CNN to global image.
				self.shape *= (self.facebox[2] - self.facebox[0])
				self.shape[:, 0] += self.facebox[0]
				self.shape[:, 1] += self.facebox[1]
				
				# Uncomment following line to show raw marks.
				# mark_detector.draw_marks(
				#     frame, marks, color=(0, 255, 0))
				
				# Uncomment following line to show facebox.
				# mark_detector.draw_box(frame, [facebox])
				
				# Try pose estimation with 68 points.
				self.pose = self.pose_estimator.solve_pose_by_68_points(self.shape)
				
				# Stabilize the pose.
				self.steady_pose = []
				pose_np = np.array(self.pose).flatten()
				for value, ps_stb in zip(pose_np, self.pose_stabilizers):
					ps_stb.update([value])
					self.steady_pose.append(ps_stb.state[0])
				self.steady_pose = np.reshape(self.steady_pose, (-1, 3))
				
				# Uncomment following line to draw pose annotation on frame.
				# pose_estimator.draw_annotation_box(
				# 	frame, pose[0], pose[1], color=(255, 128, 128))
				
				# Uncomment following line to draw stabile pose annotation on frame.
				# pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))
				
				# Uncomment following line to draw head axes on frame.
				# pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])
				self.pose_estimator.draw_axes(frame_empty, self.steady_pose[0], self.steady_pose[1])
				print('steady pose vector: {}'.format(self.steady_pose[0], self.steady_pose[1]))
			else:
				# cv2.putText(frame, "Signal loss", (200, 200),
				# 			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
				cv2.putText(frame_empty, "Signal loss", (200, 200),
							cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
			# ******************************************************************
			# We send this frame to GazeTracking to analyze it
			self.gaze.refresh(frame)
			
			frame = self.gaze.annotated_frame()
			text = ""
			
			if self.gaze.is_blinking():
				text = "Blinking"
			elif self.gaze.is_right():
				text = "Looking right"
			elif self.gaze.is_left():
				text = "Looking left"
			elif self.gaze.is_center():
				text = "Looking center"
			
			cv2.putText(frame_empty, text, (250, 250), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 2)
			
			left_pupil = self.gaze.pupil_left_coords()
			right_pupil = self.gaze.pupil_right_coords()
			cv2.putText(frame_empty, "Left pupil:  " + str(left_pupil), (250, 280), cv2.FONT_HERSHEY_DUPLEX, 0.5,
						(147, 58, 31), 1)
			cv2.putText(frame_empty, "Right pupil: " + str(right_pupil), (250, 310), cv2.FONT_HERSHEY_DUPLEX, 0.5,
						(147, 58, 31), 1)
			
			# ********************************************************************
			# show the frame
			# cv2.imshow("Frame", frame)
			cv2.imshow("Frame", frame_empty)
			key = cv2.waitKey(1) & 0xFF
			
			self.pass_variable = np.array(1)
			
			try:
				self._listener(self.pass_variable)
			except:
				pass
			
			# if the `q` key was pressed, break from the loop
			if key == ord("q"):
				break
		# do a bit of cleanup
		cv2.destroyAllWindows()
		# self.cap.stop()
	
	def set_listener(self, listener):
		self._listener = listener
def Camera_Capture():
    mark_detector = MarkDetector()
    name = input("Enter student id:")
    directory = os.path.join(facepath, name)
    
    if not os.path.exists(facepath):
        os.makedirs(facepath, exist_ok = 'True')
    
    if not os.path.exists(directory):
        try:
            os.makedirs(directory, exist_ok = 'True')
        except OSError as e:
            if e.errno != errno.EEXIST:
                print('invalid student id or access denied')
                return

    poses=['frontal','right','left','up','down']
    file=0
    cap = cv2.VideoCapture(file)
    
    ret, sample_frame = cap.read()
    i = 0
    count = 0
    if ret==False:
        return    
        
    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))
    
    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]
    images_saved_per_pose=0
    number_of_images = 0
    
    shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH)
    while i<5:
        saveit = False
        # Read frame, crop it, flip it, suits your needs.
        ret, frame = cap.read()
        if ret is False:
            break
        if count % 5 !=0: # skip 5 frames
            count+=1
            continue
        if images_saved_per_pose==IMAGE_PER_POSE:
            i+=1
            images_saved_per_pose=0

        # If frame comes from webcam, flip it so it looks like a mirror.
        if file == 0:
            frame = cv2.flip(frame, 2)
        original_frame=frame.copy()
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        facebox = mark_detector.extract_cnn_facebox(frame)
    
        if facebox is not None:
            # Detect landmarks from image of 128x128.
            x1=max(facebox[0]-0,0)
            x2=min(facebox[2]+0,width)
            y1=max(facebox[1]-0,0)
            y2=min(facebox[3]+0,height)
            
            face = frame[y1: y2,x1:x2]
            face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
    
            marks = mark_detector.detect_marks([face_img])
    
            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]
        
            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)
    
            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))
    
            # print(steady_pose[0][0])
            # if steady_pose[0][0]>0.1:
            #     print('right')
            # else: 
            #     if steady_pose[0][0]<-0.1:
            #         print('left')
            # if steady_pose[0][1]>0.1:
            #     print('down')
            # else: 
            #     if steady_pose[0][1]<-0.1:
            #         print('up')
            # print(steady_pose[0])
            if i==0:
                if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True                    
            if i==1:
                if steady_pose[0][0]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==2:
                if steady_pose[0][0]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==3:
                if steady_pose[0][1]<-ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            if i==4:
                if steady_pose[0][1]>ANGLE_THRESHOLD:
                    images_saved_per_pose+=1
                    saveit = True
            # Show preview.
            if i>=5:
                print ('Thank you')
                break

            frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA)
            frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2)
            cv2.imshow("Preview", frame)

            if saveit:                        
                face = dlib.rectangle(x1, y1, x2, y2)
                face_aligned = face_aligner.align(original_frame, frame_gray, face)
                cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned)
                print(images_saved_per_pose)
                number_of_images+=1

        if cv2.waitKey(10) == 27:
            break    
    cap.release()
    cv2.destroyAllWindows()
Beispiel #17
0
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print(
            "Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            mark_detector.draw_marks(frame, marks, color=(0, 255, 0))
            right_corner = tuple([int(i) for i in marks[36]])
            left_corner = tuple([int(i) for i in marks[45]])
            # print(marks[36], marks[45])
            cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2)

            pixel_distance = int(
                math.sqrt((right_corner[0] - left_corner[0])**2 +
                          (right_corner[1] - left_corner[1])**2))
            estimated_distance = (real_width * focal_length) / pixel_distance

            cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

            # Uncomment following line to draw head axes on frame.
            print(steady_pose[1])
            pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            angles = process_eyes(frame, marks)
            if bool(angles) is True:
                # print(angles)
                angles = cvt_to_radians(angles)
                rotated_vector = rotate_vector(steady_pose[0],
                                               angles['right'][0],
                                               angles['right'][1])
                shifted_translation_vector = np.copy(steady_pose[1])
                shifted_translation_vector[0] += 50
                shifted_translation_vector[1] += 50
                pose_estimator.draw_axes(frame, rotated_vector,
                                         shifted_translation_vector)

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    """MAIN"""
    # Video source from webcam or video file.
    video_src = args.cam if args.cam is not None else args.video
    if video_src is None:
        print("Warning: video source not assigned, default webcam will be used.")
        video_src = 0

    cap = cv2.VideoCapture(video_src)
    if video_src == 0:
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face, args=(
        mark_detector, img_queue, box_queue,))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    if args.out != None:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    tm = cv2.TickMeter()

    cnt = 0

    input_path = args.input_path
    listdir = os.listdir(input_path)
    for v_name in listdir:
        v_path = os.path.join(input_path, v_name)
        cap = cv2.VideoCapture(v_path)

        while True:
            # Read frame, crop it, flip it, suits your needs.
            frame_got, frame = cap.read()
            if frame_got is False:
                break

            # Crop it if frame is larger than expected.
            # frame = frame[0:480, 300:940]

            # If frame comes from webcam, flip it so it looks like a mirror.
            if video_src == 0:
                frame = cv2.flip(frame, 2)

            # Pose estimation by 3 steps:
            # 1. detect face;
            # 2. detect landmarks;
            # 3. estimate pose

            # Feed frame to image queue.
            img_queue.put(frame)

            # Get face from box queue.
            facebox = box_queue.get()

            if facebox is not None:
                # Detect landmarks from image of 128x128.
                face_img = frame[facebox[1]: facebox[3],
                           facebox[0]: facebox[2]]
                face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                tm.start()
                marks = mark_detector.detect_marks(face_img)
                tm.stop()

                # Convert the marks locations from local CNN to global image.
                marks *= (facebox[2] - facebox[0])
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

                # Uncomment following line to show raw marks.
                # mark_detector.draw_marks(frame, marks, color=(0, 255, 0))

                # Uncomment following line to show facebox.
                # mark_detector.draw_box(frame, [facebox])

                # Try pose estimation with 68 points.
                pose = pose_estimator.solve_pose_by_68_points(marks)

                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])
                steady_pose = np.reshape(steady_pose, (-1, 3))

                # Uncomment following line to draw pose annotation on frame.
                # pose_estimator.draw_annotation_box(
                #     frame, pose[0], pose[1], color=(255, 128, 128))

                # Uncomment following line to draw stabile pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame, steady_pose[0], steady_pose[1], color=(128, 255, 128))

                # Uncomment following line to draw head axes on frame.
                # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1])

            # Show preview.
            # cv2.imshow("Preview", frame)
            # if cv2.waitKey(10) == 27:
            #     break
            if args.out != None:
                output_movie.write(frame)
            else:
                cv2.imshow("Preview", frame)

            cnt = cnt + 1
            if cnt % 100 == 0:
                print(str(cnt), flush=True)

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cv2.destroyAllWindows()
Beispiel #19
0
def main():
    """MAIN"""
    cv2.namedWindow("Test")  # Create a named window
    cv2.moveWindow("Test", 900, 600)  # Move it to (40,30)

    screenWidth, screenHeight = pyautogui.size()
    st = 'Last command'

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              mark_detector,
                              img_queue,
                              box_queue,
                          ))
    box_process.start()

    # Setting up process for listening to audio commands
    voice_command_queue = Q()
    stt_process = Thread(target=get_voice_command,
                         args=(voice_command_queue, ))
    stt_process.setDaemon(True)
    stt_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.1,
                   cov_measure=0.1) for _ in range(6)
    ]

    tm = cv2.TickMeter()

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cap.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        facebox = box_queue.get()

        if facebox is not None:
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

            tm.start()
            marks = mark_detector.detect_marks([face_img])
            tm.stop()

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # Uncomment following line to show raw marks.
            # mark_detector.draw_marks(
            #     frame, marks, color=(0, 255, 0))

            # Uncomment following line to show facebox.
            # mark_detector.draw_box(frame, [facebox])

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])
            steady_pose = np.reshape(steady_pose, (-1, 3))

            # Uncomment following line to draw pose annotation on frame.
            # pose_estimator.draw_annotation_box(
            #     frame, pose[0], pose[1], color=(255, 128, 128))

            # Uncomment following line to draw stabile pose annotation on frame.
            pose_estimator.draw_annotation_box(frame,
                                               steady_pose[0],
                                               steady_pose[1],
                                               color=(255, 128, 128))

            # Uncomment following line to draw head axes on frame.
            endpoints = pose_estimator.getEndPoints(frame, steady_pose[0],
                                                    steady_pose[1])

            deltax = endpoints[1][0] - endpoints[0][0]
            deltay = endpoints[1][1] - endpoints[0][1]

            xpos = math.floor((deltax + 44) * screenWidth / 88)
            ypos = math.floor((deltay + 14) * screenHeight / 58)

            # print(xpos, ypos)

            pyautogui.moveTo(xpos, ypos)

            if not voice_command_queue.empty():

                command = voice_command_queue.get_nowait()
                if 'click' in command or 'select' in command:
                    pyautogui.click()
                    st = 'Click'
                elif 'double' in command or 'in' in command:
                    pyautogui.doubleClick()
                    st = 'Double Click'
                elif 'right' in command or 'menu' in command or 'light' in command:
                    pyautogui.rightClick()
                    st = 'Right Click'

                print(command)

        cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255)
        scale_percent = 30
        # calculate the 50 percent of original dimensions
        width = int(frame.shape[1] * scale_percent / 100)
        height = int(frame.shape[0] * scale_percent / 100)

        # dsize
        dsize = (width, height)

        # resize image
        output = cv2.resize(frame, dsize)

        cv2.moveWindow("Test", screenWidth - width, screenHeight - height)

        # Show preview.
        cv2.imshow("Test", output)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
def main():
    # construct the argument parse and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m", "--draw-markers", action="store_true", default=False,
                    help="")
    ap.add_argument("-c", "--draw-confidence", action="store_true", default=False,
                    help="")
    ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9,
                    help="")
    ap.add_argument("-p", "--draw-pose", action="store_false", default=True,
                    help="")
    ap.add_argument("-u", "--draw-unstable", action="store_true", default=False,
                    help="")
    ap.add_argument("-s", "--draw-segmented", action="store_true", default=False,
                    help="")
    args = vars(ap.parse_args())

    confidence_threshold = args["confidence_threshold"]

    """MAIN"""
    # Video source from webcam or video file.
    video_src = 0
    cam = cv2.VideoCapture(video_src)
    _, sample_frame = cam.read()

    # Introduce mark_detector to detect landmarks.
    mark_detector = MarkDetector()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)

    if isWindows():
        thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue))
        thread.daemon = True
        thread.start()
    else:
        box_process = Process(target=get_face,
                              args=(mark_detector, confidence_threshold, img_queue, box_queue))
        box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    height, width = sample_frame.shape[:2]
    pose_estimator = PoseEstimator(img_size=(height, width))

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
        state_num=2,
        measure_num=1,
        cov_process=0.1,
        cov_measure=0.1) for _ in range(6)]

    while True:
        # Read frame, crop it, flip it, suits your needs.
        frame_got, frame = cam.read()
        if frame_got is False:
            break

        # Crop it if frame is larger than expected.
        # frame = frame[0:480, 300:940]

        # If frame comes from webcam, flip it so it looks like a mirror.
        if video_src == 0:
            frame = cv2.flip(frame, 2)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        # Feed frame to image queue.
        img_queue.put(frame)

        # Get face from box queue.
        result = box_queue.get()

        if result is not None:
            if args["draw_confidence"]:
                mark_detector.face_detector.draw_result(frame, result)
            # unpack result
            facebox, confidence = result
            # fix facebox if needed
            if facebox[1] > facebox[3]:
                facebox[1] = 0
            if facebox[0] > facebox[2]:
                facebox[0] = 0
            # Detect landmarks from image of 128x128.
            face_img = frame[facebox[1]: facebox[3],
                             facebox[0]: facebox[2]]
            face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
            face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
            marks = mark_detector.detect_marks(face_img)

            # Convert the marks locations from local CNN to global image.
            marks *= (facebox[2] - facebox[0])
            marks[:, 0] += facebox[0]
            marks[:, 1] += facebox[1]

            # segment the image based on markers and facebox
            seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0])
            if args["draw_segmented"]:
                mark_detector.draw_box(frame, seg.getSegmentBBs())
                cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"])

            if args["draw_markers"]:
                mark_detector.draw_marks(
                    frame, marks, color=(0, 255, 0))

            # Try pose estimation with 68 points.
            pose = pose_estimator.solve_pose_by_68_points(marks)

            # Stabilize the pose.
            stable_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                stable_pose.append(ps_stb.state[0])
            stable_pose = np.reshape(stable_pose, (-1, 3))

            if args["draw_unstable"]:
                pose_estimator.draw_annotation_box(
                    frame, pose[0], pose[1], color=(255, 128, 128))

            if args["draw_pose"]:
                pose_estimator.draw_annotation_box(
                    frame, stable_pose[0], stable_pose[1], color=(128, 255, 128))

        # Show preview.
        cv2.imshow("Preview", frame)
        if cv2.waitKey(10) == 27:
            break

    # Clean up the multiprocessing process.
    if not isWindows():
        box_process.terminate()
        box_process.join()
Beispiel #21
0
    def cameraCap(self):
        self.uId = self.userId.get()
        if self.uId != '':
            if self.outputLabel != None:
                self.outputLabel.destroy()

            self.outputLabel = Label(self.framePic, text="Here We Start")
            self.outputLabel.config(font=("Courier", 44))
            self.outputLabel.place(x=400, y=100)

            mark_detector = MarkDetector()
            name = self.uId
            directory = os.path.join(facepath, name)

            if not os.path.exists(facepath):
                os.makedirs(facepath, exist_ok='True')

            if not os.path.exists(directory):
                try:
                    os.makedirs(directory, exist_ok='True')
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        print('invalid student id or access denied')
                        return

            poses = ['frontal', 'right', 'left', 'up', 'down']
            file = 0

            ret, sample_frame = self.cap.read()
            i = 0
            count = 0
            if ret == False:
                return

            # Introduce pose estimator to solve pose. Get one frame to setup the
            # estimator according to the image size.
            height, width = sample_frame.shape[:2]
            pose_estimator = PoseEstimator(img_size=(height, width))

            # Introduce scalar stabilizers for pose.
            pose_stabilizers = [
                Stabilizer(state_num=2,
                           measure_num=1,
                           cov_process=0.1,
                           cov_measure=0.1) for _ in range(6)
            ]
            images_saved_per_pose = 0
            number_of_images = 0

            shape_predictor = dlib.shape_predictor(
                "shape_predictor_68_face_landmarks.dat")
            face_aligner = FaceAligner(shape_predictor,
                                       desiredFaceWidth=FACE_WIDTH)
            while i < 5:
                saveit = False
                # Read frame, crop it, flip it, suits your needs.
                ret, frame = self.cap.read()
                if ret is False:
                    break
                if count % 5 != 0:  # skip 5 frames
                    count += 1
                    continue
                if images_saved_per_pose == IMAGE_PER_POSE:
                    i += 1
                    images_saved_per_pose = 0

                # If frame comes from webcam, flip it so it looks like a mirror.
                if file == 0:
                    frame = cv2.flip(frame, 2)
                original_frame = frame.copy()
                frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                facebox = mark_detector.extract_cnn_facebox(frame)

                if facebox is not None:
                    # Detect landmarks from image of 128x128.
                    x1 = max(facebox[0] - 0, 0)
                    x2 = min(facebox[2] + 0, width)
                    y1 = max(facebox[1] - 0, 0)
                    y2 = min(facebox[3] + 0, height)

                    face = frame[y1:y2, x1:x2]
                    face_img = cv2.resize(face,
                                          (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
                    face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)

                    marks = mark_detector.detect_marks([face_img])

                    # Convert the marks locations from local CNN to global image.
                    marks *= (facebox[2] - facebox[0])
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]

                    # Try pose estimation with 68 points.
                    pose = pose_estimator.solve_pose_by_68_points(marks)

                    # Stabilize the pose.
                    steady_pose = []
                    pose_np = np.array(pose).flatten()
                    for value, ps_stb in zip(pose_np, pose_stabilizers):
                        ps_stb.update([value])
                        steady_pose.append(ps_stb.state[0])
                    steady_pose = np.reshape(steady_pose, (-1, 3))

                    # print(steady_pose[0][0])
                    # if steady_pose[0][0]>0.1:
                    #     print('right')
                    # else:
                    #     if steady_pose[0][0]<-0.1:
                    #         print('left')
                    # if steady_pose[0][1]>0.1:
                    #     print('down')
                    # else:
                    #     if steady_pose[0][1]<-0.1:
                    #         print('up')
                    # print(steady_pose[0])
                    if i == 0:
                        if abs(steady_pose[0][0]) < ANGLE_THRESHOLD and abs(
                                steady_pose[0][1]) < ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 1:
                        if steady_pose[0][0] > ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 2:
                        if steady_pose[0][0] < -ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 3:
                        if steady_pose[0][1] < -ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    if i == 4:
                        if steady_pose[0][1] > ANGLE_THRESHOLD:
                            images_saved_per_pose += 1
                            saveit = True
                    # Show preview.
                    if i >= 5:
                        print('Thank you')
                        if self.outputLabel != None:
                            self.outputLabel.destroy()

                        self.outputLabel = Label(self.framePic, text="Thanks")
                        self.outputLabel.config(font=("Courier", 44))
                        self.outputLabel.place(x=400, y=100)
                        break

                    frame = cv2.putText(
                        frame, poses[i] + ' : ' + str(images_saved_per_pose) +
                        '/' + str(IMAGE_PER_POSE), (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 1,
                        cv2.LINE_AA)
                    frame = cv2.rectangle(frame, (x1, y1), (x2, y2),
                                          (255, 255, 0), 2)
                    frame = imutils.resize(frame, width=300, height=300)
                    # OpenCV represents images in BGR order; however PIL
                    # represents images in RGB order, so we need to swap
                    # the channels, then convert to PIL and ImageTk format
                    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    image = Image.fromarray(image)
                    image = ImageTk.PhotoImage(image)
                    # if the panel is not None, we need to initialize it
                    if self.panel is None:
                        self.panel = Label(self.framePic, image=image)
                        self.panel.image = image
                        self.panel.pack(side=LEFT, padx=0, pady=0)
                        print("Done")
                    # otherwise, simply update the panel
                    else:
                        self.panel.configure(image=image)
                        self.panel.image = image

                    if saveit:
                        face = dlib.rectangle(x1, y1, x2, y2)
                        face_aligned = face_aligner.align(
                            original_frame, frame_gray, face)
                        cv2.imwrite(
                            os.path.join(
                                directory,
                                str(name) + '_' + str(number_of_images) +
                                '.jpg'), face_aligned)
                        print(images_saved_per_pose)
                        number_of_images += 1

            self.cap.release()
        else:
            if self.outputLabel != None:
                self.outputLabel.destroy()

            self.outputLabel = Label(self.framePic,
                                     text="Please Enter a Valid Id")
            self.outputLabel.config(font=("Courier", 44))
            self.outputLabel.place(x=400, y=100)