Example #1
0
    def face_thread(self):
        face_nn = self.device.getOutputQueue("face_nn")
        landmark_in = self.device.getInputQueue("landmark_in")
        pose_in = self.device.getInputQueue("pose_in")

        while self.running:
            if self.frame is None:
                continue
            try:
                bboxes = np.array(face_nn.get().getFirstLayerFp16())
            except RuntimeError as ex:
                continue
            bboxes = bboxes.reshape((bboxes.size // 7, 7))
            self.bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]

            for raw_bbox in self.bboxes:
                bbox = frame_norm(self.frame, raw_bbox)
                det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                land_data = depthai.NNData()
                land_data.setLayer("0", to_planar(det_frame, (48, 48)))
                landmark_in.send(land_data)

                pose_data = depthai.NNData()
                pose_data.setLayer("data", to_planar(det_frame, (60, 60)))
                pose_in.send(pose_data)

                self.face_box_q.put(bbox)
Example #2
0
    def forward(self, data):
        results = []
        if data is not None:
            for sample in data[0]:
                nn_data = dai.NNData()
                nn_data.setLayer(self.input_name, sample)
                self.data_in.send(nn_data)
                assert wait_for_results(self.data_out)
                results.append(self.data_out.get())
            data[0] = results
        else:
            assert wait_for_results(self.data_out)
            results.append(self.data_out.get())
            data = [
                results,
                [
                    DataInfo(
                        scales=(1.0, 1.0),
                        pads=(0, 0),
                        original_width=self.input_width,
                        original_height=self.input_height,
                    ),
                ],
            ]

        return data
Example #3
0
 def forward(self, in_queue, out_queue, data):
     results = []
     if data is not None:
         for sample in data[0]:
             nn_data = dai.NNData()
             nn_data.setLayer("data", sample)
             in_queue.send(nn_data)
             assert wait_for_results(out_queue)
             results.append(out_queue.get())
         data[0] = results
     else:
         assert wait_for_results(out_queue)
         results.append(out_queue.get())
         data = [
             results,
             [
                 DataInfo(
                     scales=(
                         self.input_width / self.video_width,
                         self.input_height / self.video_height,
                     ),
                     pads=(0, 0),
                     original_width=self.video_width,
                     original_height=self.video_height,
                 ),
             ],
         ]
     return data
Example #4
0
    def run(self):
        threading.Thread(target=self.det_thread, daemon=True).start()
        threading.Thread(target=self.reid_thread, daemon=True).start()

        while self.cap.isOpened():
            read_correctly, self.frame = self.cap.read()

            if not read_correctly:
                break

            self.fps.update()
            self.debug_frame = self.frame.copy()

            nn_data = depthai.NNData()
            nn_data.setLayer("input", to_planar(self.frame, (544, 320)))
            self.detection_in.send(nn_data)

            aspect_ratio = self.frame.shape[1] / self.frame.shape[0]
            cv2.imshow(
                "Camera_view",
                cv2.resize(self.debug_frame,
                           (int(900), int(900 / aspect_ratio))))
            if cv2.waitKey(1) == ord('q'):
                cv2.destroyAllWindows()
                break

        self.fps.stop()
        print("FPS: {:.2f}".format(self.fps.fps()))
        self.cap.release()
Example #5
0
def run_nn(x_in, x_out, in_dict):
    nn_data = depthai.NNData()
    for key in in_dict:
        nn_data.setLayer(key, in_dict[key])
    x_in.send(nn_data)
    has_results = wait_for_results(x_out)
    if not has_results:
        raise RuntimeError("No data from nn!")
    return x_out.get()
 def forward(in_queue, out_queue, data):
     results = []
     for sample in data[0]:
         nn_data = dai.NNData()
         nn_data.setLayer("data", sample)
         in_queue.send(nn_data)
         assert wait_for_results(out_queue)
         results.append(out_queue.get())
     data[0] = results
     return data
Example #7
0
 def forward(self, data):
     results = []
     for sample in data[0]:
         nn_data = dai.NNData()
         nn_data.setLayer("data", sample)
         self.data_in.send(nn_data)
         assert wait_for_results(self.data_out)
         results.append(self.data_out.get())
     data[0] = results
     return data
Example #8
0
 def forward(self, data, stage="age-gender"):
     results = []
     for sample in data[0]:
         sample_results = []
         for face in sample:
             nn_data = dai.NNData()
             nn_data.setLayer("data", face)
             self.age_gender_in.send(nn_data)
             assert wait_for_results(self.age_gender_out)
             sample_results.append(self.age_gender_out.get())
         results.append(sample_results)
     data[0] = results
     return data
Example #9
0
def run_nn(x_in, x_out, in_dict):
    """

    :param x_in: X_link_in
    :param x_out: X_link_out
    :param in_dict:
    :return:
    """
    nn_data = depthai.NNData()
    for key in in_dict:
        nn_data.setLayer(key, in_dict[key])
    x_in.send(nn_data)
    # has_results = wait_for_results(x_out)
    # if not has_results:
    #     raise RuntimeError("No data from nn!")
    return x_out.tryGet()
Example #10
0
    def det_thread(self):
        detection_nn = self.device.getOutputQueue("detection_nn")
        while True:
            bboxes = np.array(detection_nn.get().getFirstLayerFp16())
            bboxes = bboxes[:np.where(bboxes == -1)[0][0]]
            bboxes = bboxes.reshape((bboxes.size // 7, 7))
            bboxes = bboxes[bboxes[:, 2] > 0.5][:, 3:7]

            for raw_bbox in bboxes:
                bbox = frame_norm(self.frame, raw_bbox)
                det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                nn_data = depthai.NNData()
                nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
                self.reid_in.send(nn_data)
                self.reid_bbox_q.put(bbox)
Example #11
0
    def run_face(self):
        face_nn = self.device.getOutputQueue("face_nn")
        land68_in = self.device.getInputQueue("land68_in", 4, False)
        while self.running:
            if self.frame is None:
                continue
            bboxes = np.array(face_nn.get().getFirstLayerFp16())
            bboxes = bboxes.reshape((bboxes.size // 7, 7))
            self.bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]
            for raw_bbox in self.bboxes:
                bbox = frame_norm(self.frame, raw_bbox)
                det_frame = self.frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                land68_data = depthai.NNData()
                land68_data.setLayer("data", to_planar(det_frame, (160, 160)))
                land68_in.send(land68_data)
                self.face_box_q.put(bbox)
Example #12
0
    def advanced_main(self):

        pipeline = create_advanced_pipeline()
        with dai.Device(pipeline) as device:
            # Create output queues
            vidQ = device.getOutputQueue(name="cam", maxSize=1, blocking=False)
            depthQ = device.getOutputQueue(name="depth", maxSize=1, blocking=False)
            palmQ = device.getOutputQueue(name="palm_nn", maxSize=1, blocking=False)
            faceQ = device.getOutputQueue("face_nn",maxSize=1, blocking=False)
            pose_inQ = device.getInputQueue("pose_in",maxSize=1, blocking=False)
            pose_outQ = device.getOutputQueue(name="pose_nn", maxSize=1, blocking=False)

            palmDetection = PalmDetection()

            depthFrame = None
            frame = None
            head_loc = None

            print("Main loop init")

            self.fps.start()

            while rclpy.ok():

                in_rgb = vidQ.tryGet()
                if in_rgb is not None:
                    frame = crop_to_rect(in_rgb.getCvFrame())
                    debug_frame = frame.copy()

                in_depth = depthQ.tryGet()
                if in_depth is not None:
                    depthFrame = crop_to_rect(in_depth.getFrame())


                in_face = faceQ.tryGet()
                head_bbox=None
                if in_face is not None and frame is not None and depthFrame is not None:
                    
                    bboxes = bbox_face_extraction(in_face)
                    color=(143, 184, 77)
                    for raw_bbox in bboxes:

                        bbox = frame_norm(frame, raw_bbox)
                        det_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                        pose_data = dai.NNData()
                        pose_data.setLayer("data", to_planar(det_frame, (60, 60)))
                        pose_inQ.send(pose_data)

                        draw_bbox(debug_frame,bbox,color)
                        head_bbox=bbox
                        head_loc = calc_spatials(bbox,depthFrame,RED_RATIO_FACE,filter="median")

                palm_in = palmQ.tryGet()

                if palm_in is not None and frame is not None and depthFrame is not None:

                    #perform computation and output drawing
                    palm_coords = palmDetection.run_palm(debug_frame, palm_in)
                    # Calculate and draw spatial coordinates of the palm
                    spatialCoords = draw_palm_detection(debug_frame, palm_coords, depthFrame)

                    #publish palm transform
                    if spatialCoords is not None:
                        self.publish_palm_transform(spatialCoords)

                    ###### IMSHOW FOR DEPTH AND FRAME
                    #cv2.imshow("debug", debug_frame)
                    #show_depth(depthFrame)

                head_or = pose_outQ.tryGet()

                if head_or is not None:
                    pose = [val[0][0] for val in to_tensor_result(head_or).values()]
                    if head_loc[2] is not np.nan:
                        self.publish_head_transform(head_loc,pose)
                        #print("Loc:({0},{1},{2}) , Or: ({3},{4},{5})".format(head_loc[0],head_loc[1],head_loc[2],pose[0],pose[1],pose[2]))
                    #draw_3d_axis(debug_frame,pose,(int(head_bbox[0]),int(head_bbox[1])),100)
                    draw_pose_data(debug_frame,pose,head_loc, head_bbox,color=(143, 184, 77))

                    #publish detection image
                    cvb = CvBridge()
                    stamp = self.get_clock().now().to_msg()
                    image_msg = cvb.cv2_to_imgmsg(debug_frame, encoding='bgr8')
                    image_msg.header.stamp = stamp
                    image_msg.header.frame_id = 'oak-d_frame'
                    
                    self.pub_rectified_img.publish(image_msg)

                self.fps.update()

                if cv2.waitKey(1) == ord('q'):
                    cv2.destroyAllWindows()
                    self.fps.stop()
                    print("CAM FPS: {0}  P-FPS:{1}".format(CAM_FPS,self.fps.fps()))
                    self.destroy_node()
Example #13
0
            return True, np.ascontiguousarray(new_frame)
        else:
            return cap.read()


    class_names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
    result = None

    while should_run():
        read_correctly, frame = get_frame()

        if not read_correctly:
            break

        if not camera:
            nn_data = dai.NNData()
            nn_data.setLayer("input", to_planar(frame, (180, 180)))
            detection_in.send(nn_data)

        in_nn = q_nn.tryGet()

        if in_nn is not None:
            data = softmax(in_nn.getFirstLayerFp16())
            result_conf = np.max(data)
            if result_conf > 0.5:
                result = {
                    "name": class_names[np.argmax(data)],
                    "conf": round(100 * result_conf, 2)
                }
            else:
                result = None
Example #14
0
    def process(self, context):

        video_frame = context["frame"][OAK_Stage.COLOR]

        if video_frame is None:
            if self.videoframe is None:
                return
            else:
                video_frame = self.videoframe
        else:
            self.videoframe = video_frame

        self.HandTracker.video_size = video_frame.shape[0]
        pd_inference = context["output_queues"]["palm_detector"].tryGet()

        if pd_inference is not None:

            self.HandTracker.pd_postprocess(pd_inference)

            results_palm = []

            for r in self.HandTracker.regions:
                box = (np.array(r.pd_box) *
                       self.HandTracker.video_size).astype(int)
                ponto = np.array([[box[0], box[1]],
                                  [box[0] + box[2], box[1] + box[3]]])
                result = ObjectDetection("hand", ponto, context["frame_id"])

                results_palm.append(result)

            self._setOutput(results_palm, 'palm_detection_list')

            bodyposes = []
            gestures = []

            for i, r in enumerate(self.HandTracker.regions):
                img_hand = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.HandTracker.lm_input_length,
                                             self.HandTracker.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(img_hand, (self.HandTracker.lm_input_length,
                                         self.HandTracker.lm_input_length)))
                context["input_queues"]['hand_lm_in'].send(nn_data)

                inference = context["output_queues"]['hand_lm'].get()

                self.HandTracker.lm_postprocess(r, inference)

                if r.lm_score < self.HandTracker.lm_score_threshold:
                    continue

                src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32)
                dst = np.array(
                    [(x, y) for x, y in r.rect_points[1:]], dtype=np.float32
                )  # region.rect_points[0] is left bottom point !
                mat = cv2.getAffineTransform(src, dst)
                lm_xy = np.expand_dims(np.array([(l[0], l[1])
                                                 for l in r.landmarks]),
                                       axis=0)
                lm_xy = np.squeeze(cv2.transform(lm_xy, mat)).astype(np.int)

                bp = BodyPose(frame_id=context["frame_id"], pixel_space=True)
                for i in range(lm_xy.shape[0]):
                    name = OAK_Handpose.kp_name[i]
                    bp.add_keypoint(name, lm_xy[i][0], lm_xy[i][1])

                bodyposes.append(bp)

                gesture = Gesture()
                gesture._gesture = r.gesture
                gestures.append(gesture)

            self._setOutput(bodyposes, "hand_pose_list")
            self._setOutput(gestures, 'gesture_list')
    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues
        if self.input_type == "internal":
            q_video = device.getOutputQueue(name="cam_out",
                                            maxSize=1,
                                            blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=1,
                                             blocking=False)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=2,
                                             blocking=False)
            q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_out = device.getOutputQueue(name="lm_out",
                                             maxSize=4,
                                             blocking=True)
            q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()

            if self.input_type == "internal":
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
                self.frame_size = video_frame.shape[
                    0]  # The image is square cropped on the device
                self.pad_w = self.pad_h = 0
            else:
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break

                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(
                    to_planar(video_frame,
                              (self.pd_input_length, self.pd_input_length)))
                pd_rtrip_time = now()
                q_pd_in.send(frame_nn)

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get pose detection
            inference = q_pd_out.get()
            if self.input_type != "internal":
                pd_rtrip_time = now() - pd_rtrip_time
                glob_pd_rtrip_time += pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            for i, r in enumerate(self.regions):
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(frame_nn,
                              (self.lm_input_length, self.lm_input_length)))
                if i == 0:
                    lm_rtrip_time = now(
                    )  # We measure only for the first region
                q_lm_in.send(nn_data)

                # Get landmarks
                inference = q_lm_out.get()
                if i == 0:
                    lm_rtrip_time = now() - lm_rtrip_time
                    glob_lm_rtrip_time += lm_rtrip_time
                    nb_lm_inferences += 1
                self.lm_postprocess(r, inference)
                self.lm_render(annotated_frame, r)
            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if self.input_type != "internal" and not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps

        # Print some stats
        print(f"# pose detection inferences : {nb_pd_inferences}")
        print(f"# landmark inferences       : {nb_lm_inferences}")
        if self.input_type != "internal" and nb_pd_inferences != 0:
            print(
                f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
            )
        if nb_lm_inferences != 0:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
def run_nn(x_in, x_out, in_dict):
    nn_data = depthai.NNData()
    for key in in_dict:
        nn_data.setLayer(key, in_dict[key])
    x_in.send(nn_data)
    return x_out.tryGet()
Example #17
0
        break
    if sum(body.scores > body.score_thresh) > 8:
        keypoints = np.clip(body.keypoints, [0, 0],
                            [frame.shape[1], frame.shape[0]])
        x, y, w, h = cv2.boundingRect(keypoints)

        I = np.zeros_like(frame, dtype=np.uint8)
        I = renderer.draw(I, body)
        I = cv2.cvtColor(I, cv2.COLOR_BGR2GRAY)
        I = np.clip(I, 0, 1) * 255
        I = pose.crop_and_resize(I, pose.crop_region)

        # I = I[y : y + h, x : x + w]
        I = cv2.resize(I, (128, 128))

        frame_ac = dai.NNData()
        frame_ac.setLayer("input", I.ravel())
        pose.q_ac_in.send(frame_ac)
        crown_proportion = w / h
        # Get result from device
        predect = pose.q_ac_out.get()
        predect = np.array(predect.getLayerFp16("output")).reshape(-1, 2)
        action_id = int(np.argmax(predect))
        possible_rate = 0.6 * predect[:,
                                      action_id] + 0.4 * (crown_proportion - 1)

        if possible_rate > 0.55:
            pose_action = "fall"
            print(predect)
            if possible_rate > 0.7:
                cv2.putText(
Example #18
0
    def run(self):
        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        q_video = device.getOutputQueue(name="cam_out",
                                        maxSize=1,
                                        blocking=False)
        q_pd_in = device.getInputQueue(name="pd_in")
        q_pd_out = device.getOutputQueue(name="pd_out",
                                         maxSize=4,
                                         blocking=True)
        q_lm_out = device.getOutputQueue(name="lm_out",
                                         maxSize=4,
                                         blocking=True)
        q_lm_in = device.getInputQueue(name="lm_in")
        q_asl_out = device.getOutputQueue(name="asl_out",
                                          maxSize=4,
                                          blocking=True)
        q_asl_in = device.getInputQueue(name="asl_in")

        while True:
            in_video = q_video.get()
            video_frame = in_video.getCvFrame()

            h, w = video_frame.shape[:2]
            self.frame_size = max(h, w)
            self.pad_h = int((self.frame_size - h) / 2)
            self.pad_w = int((self.frame_size - w) / 2)

            video_frame = cv2.copyMakeBorder(video_frame, self.pad_h,
                                             self.pad_h, self.pad_w,
                                             self.pad_w, cv2.BORDER_CONSTANT)

            frame_nn = dai.ImgFrame()
            frame_nn.setWidth(self.pd_input_length)
            frame_nn.setHeight(self.pd_input_length)
            frame_nn.setData(
                to_planar(video_frame,
                          (self.pd_input_length, self.pd_input_length)))
            q_pd_in.send(frame_nn)

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            self.pd_postprocess(inference)

            # Send data for hand landmarks
            for i, r in enumerate(self.regions):
                img_hand = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(img_hand,
                              (self.lm_input_length, self.lm_input_length)))
                q_lm_in.send(nn_data)

            # Retrieve hand landmarks
            for i, r in enumerate(self.regions):
                inference = q_lm_out.get()
                self.lm_postprocess(r, inference)
                hand_frame, handedness, hand_bbox = self.lm_render(
                    video_frame, annotated_frame, r)
                # ASL recognition
                if hand_frame is not None and self.asl_recognition:
                    hand_frame = cv2.resize(
                        hand_frame,
                        (self.asl_input_length, self.asl_input_length),
                        interpolation=cv2.INTER_NEAREST)
                    hand_frame = hand_frame.transpose(2, 0, 1)
                    nn_data = dai.NNData()
                    nn_data.setLayer("input", hand_frame)
                    q_asl_in.send(nn_data)
                    asl_result = np.array(q_asl_out.get().getFirstLayerFp16())
                    asl_idx = np.argmax(asl_result)
                    # Recognized ASL character is associated with a probability
                    asl_char = [
                        characters[asl_idx],
                        round(asl_result[asl_idx] * 100, 1)
                    ]
                    selected_char = asl_char
                    current_char_queue = None
                    if handedness > 0.5:
                        current_char_queue = self.right_char_queue
                    else:
                        current_char_queue = self.left_char_queue
                    current_char_queue.append(selected_char)
                    # Peform filtering of recognition resuls using the previous 5 results
                    # If there aren't enough reults, take the first result as output
                    if len(current_char_queue) < 5:
                        selected_char = current_char_queue[0]
                    else:
                        char_candidate = {}
                        for i in range(5):
                            if current_char_queue[i][0] not in char_candidate:
                                char_candidate[current_char_queue[i][0]] = [
                                    1, current_char_queue[i][1]
                                ]
                            else:
                                char_candidate[current_char_queue[i]
                                               [0]][0] += 1
                                char_candidate[current_char_queue[i][0]][
                                    1] += current_char_queue[i][1]
                        most_voted_char = ""
                        max_votes = 0
                        most_voted_char_prob = 0
                        for key in char_candidate:
                            if char_candidate[key][0] > max_votes:
                                max_votes = char_candidate[key][0]
                                most_voted_char = key
                                most_voted_char_prob = round(
                                    char_candidate[key][1] /
                                    char_candidate[key][0], 1)
                        selected_char = (most_voted_char, most_voted_char_prob)

                    if self.show_asl:
                        gesture_string = "Letter: " + selected_char[
                            0] + ", " + str(selected_char[1]) + "%"
                        textSize = self.ft.getTextSize(gesture_string,
                                                       fontHeight=14,
                                                       thickness=-1)[0]
                        cv2.rectangle(video_frame,
                                      (hand_bbox[0] - 5, hand_bbox[1]),
                                      (hand_bbox[0] + textSize[0] + 5,
                                       hand_bbox[1] - 18), (36, 152, 0), -1)
                        self.ft.putText(img=video_frame,
                                        text=gesture_string,
                                        org=(hand_bbox[0], hand_bbox[1] - 5),
                                        fontHeight=14,
                                        color=(255, 255, 255),
                                        thickness=-1,
                                        line_type=cv2.LINE_AA,
                                        bottomLeftOrigin=True)

            video_frame = video_frame[self.pad_h:self.pad_h + h,
                                      self.pad_w:self.pad_w + w]
            cv2.imshow("hand tracker", video_frame)
            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_hand_box = not self.show_hand_box
            elif key == ord('2'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('3'):
                self.show_asl = not self.show_asl
def process_image(transform, processing_model, img):
    global useOAKDCam, bboxes, results, results_path, reid_bbox_q, next_id, device, face_bbox_q, age_gender_in, age_gender_nn, cap, cam_out, detection_in, detection_nn, reid_in, reid_nn
    tracks = []
    try:
        if useOAKDCam:
            #     ret, frame = cap.read()
            frame = np.array(cam_out.get().getData()).reshape(
                (3, 320, 544)).transpose(1, 2, 0).astype(np.uint8)
        else:
            frame = img

        if transform == 'ssd':

            if frame is not None:
                if not useOAKDCam:
                    nn_data = depthai.NNData()
                    nn_data.setLayer("input", to_planar(frame, (300, 300)))
                    detection_in.send(nn_data)

                in_nn = detection_nn.tryGet()

                if in_nn is not None:
                    # one detection has 7 numbers, and the last detection is followed by -1 digit, which later is filled with 0
                    bboxes = np.array(in_nn.getFirstLayerFp16())
                    # take only the results before -1 digit
                    bboxes = bboxes[:np.where(bboxes == -1)[0][0]]
                    # transform the 1D array into Nx7 matrix
                    bboxes = bboxes.reshape((bboxes.size // 7, 7))
                    # filter out the results which confidence less than a defined threshold
                    bboxes = bboxes[bboxes[:, 2] > 0.5][:, 3:7]

                    # if the frame is available, draw bounding boxes on it and show the frame
                    for raw_bbox in bboxes:
                        bbox = frame_norm2(frame, raw_bbox)
                        cv2.rectangle(frame, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (255, 255, 255), 3)
                    img = frame

        #pedestrian reidentification https://github.com/luxonis/depthai-experiments/tree/master/pedestrian-reidentification
        if transform == 'pre':

            if frame is not None:
                debug_frame = frame.copy()

            if not useOAKDCam:
                nn_data = depthai.NNData()
                nn_data.setLayer("input", to_planar(frame, (544, 320)))
                detection_in.send(nn_data)
            # else:
            #     return tracks, img

            while detection_nn.has():
                bboxes = np.array(detection_nn.get().getFirstLayerFp16())
                bboxes = bboxes[:np.where(bboxes == -1)[0][0]]
                bboxes = bboxes.reshape((bboxes.size // 7, 7))
                bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]

                for raw_bbox in bboxes:
                    bbox = frame_norm_1(frame, raw_bbox)
                    det_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    nn_data = depthai.NNData()
                    nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
                    reid_in.send(nn_data)
                    reid_bbox_q.put(bbox)

            while reid_nn.has():
                reid_result = reid_nn.get().getFirstLayerFp16()
                bbox = reid_bbox_q.get()

                for person_id in results:
                    dist = cos_dist(reid_result, results[person_id])
                    if dist > 0.7:
                        result_id = person_id
                        results[person_id] = reid_result
                        break
                else:
                    result_id = next_id
                    results[result_id] = reid_result
                    results_path[result_id] = []
                    next_id += 1

                # if debug:
                cv2.rectangle(debug_frame, (bbox[0], bbox[1]),
                              (bbox[2], bbox[3]), (10, 245, 10), 2)
                x = (bbox[0] + bbox[2]) // 2
                y = (bbox[1] + bbox[3]) // 2
                results_path[result_id].append([x, y])
                cv2.putText(debug_frame, str(result_id), (x, y),
                            cv2.FONT_HERSHEY_TRIPLEX, 1.0, (255, 255, 0))
                if len(results_path[result_id]) > 1:
                    cv2.polylines(
                        debug_frame,
                        [np.array(results_path[result_id], dtype=np.int32)],
                        False, (255, 0, 255), 2)
                # else:
                #     print(f"Saw id: {result_id}")

            img = debug_frame

        # gaze estimation https://github.com/luxonis/depthai-experiments/tree/master/gaze-estimation
        elif transform == 'gaze':
            model = processing_model
            model.frame = frame
            tracks, img = model.parse()

        # age gender recognition https://github.com/luxonis/depthai-experiments/tree/master/gen2-age-gender
        elif transform == 'age-gen':
            if frame is not None:
                debug_frame = frame.copy()

                if not useOAKDCam:
                    nn_data = depthai.NNData()
                    nn_data.setLayer("input", to_planar(frame, (300, 300)))
                    detection_in.send(nn_data)

            while detection_nn.has():
                bboxes = np.array(detection_nn.get().getFirstLayerFp16())
                bboxes = bboxes.reshape((bboxes.size // 7, 7))
                bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]

                for raw_bbox in bboxes:
                    bbox = frame_norm_1(frame, raw_bbox)
                    det_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    nn_data = depthai.NNData()
                    nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
                    age_gender_in.send(nn_data)
                    face_bbox_q.put(bbox)

            while age_gender_nn.has():
                det = age_gender_nn.get()
                age = int(
                    float(np.squeeze(np.array(det.getLayerFp16('age_conv3'))))
                    * 100)
                gender = np.squeeze(np.array(det.getLayerFp16('prob')))
                gender_str = "female" if gender[0] > gender[1] else "male"
                bbox = face_bbox_q.get()

                while not len(results) < len(bboxes) and len(results) > 0:
                    results.pop(0)
                results.append({
                    "bbox": bbox,
                    "gender": gender_str,
                    "age": age,
                    "ts": time.time()
                })

            results = list(
                filter(lambda result: time.time() - result["ts"] < 0.2,
                       results))

            if frame is not None:
                for result in results:
                    bbox = result["bbox"]
                    cv2.rectangle(debug_frame, (bbox[0], bbox[1]),
                                  (bbox[2], bbox[3]), (10, 245, 10), 2)
                    y = (bbox[1] + bbox[3]) // 2
                    cv2.putText(debug_frame, str(result["age"]), (bbox[0], y),
                                cv2.FONT_HERSHEY_TRIPLEX, 1.0, (255, 255, 255))
                    cv2.putText(debug_frame, result["gender"],
                                (bbox[0], y + 20), cv2.FONT_HERSHEY_TRIPLEX,
                                1.0, (255, 255, 255))

            img = debug_frame

    except Exception as e:
        track = traceback.format_exc()
        print(track)
        print("OAK-D Exception", e)
        pass

    return tracks, img
Example #20
0
    def land_pose_thread(self):
        landmark_nn = self.device.getOutputQueue(name="landmark_nn",
                                                 maxSize=1,
                                                 blocking=False)
        pose_nn = self.device.getOutputQueue(name="pose_nn",
                                             maxSize=1,
                                             blocking=False)
        gaze_in = self.device.getInputQueue("gaze_in")

        while self.running:
            try:
                land_in = landmark_nn.get().getFirstLayerFp16()
            except RuntimeError as ex:
                continue

            try:
                face_bbox = self.face_box_q.get(block=True, timeout=100)
            except queue.Empty:
                continue

            self.face_box_q.task_done()
            left = face_bbox[0]
            top = face_bbox[1]
            face_frame = self.frame[face_bbox[1]:face_bbox[3],
                                    face_bbox[0]:face_bbox[2]]
            land_data = frame_norm(face_frame, land_in)
            land_data[::2] += left
            land_data[1::2] += top
            left_bbox = padded_point(land_data[:2],
                                     padding=30,
                                     frame_shape=self.frame.shape)
            if left_bbox is None:
                print("Point for left eye is corrupted, skipping nn result...")
                continue
            self.left_bbox = left_bbox
            right_bbox = padded_point(land_data[2:4],
                                      padding=30,
                                      frame_shape=self.frame.shape)
            if right_bbox is None:
                print(
                    "Point for right eye is corrupted, skipping nn result...")
                continue
            self.right_bbox = right_bbox
            self.nose = land_data[4:6]
            left_img = self.frame[self.left_bbox[1]:self.left_bbox[3],
                                  self.left_bbox[0]:self.left_bbox[2]]
            right_img = self.frame[self.right_bbox[1]:self.right_bbox[3],
                                   self.right_bbox[0]:self.right_bbox[2]]

            try:
                self.pose = [
                    val[0][0]
                    for val in to_tensor_result(pose_nn.get()).values()
                ]
            except RuntimeError as ex:
                continue

            gaze_data = depthai.NNData()
            gaze_data.setLayer("left_eye_image", to_planar(left_img, (60, 60)))
            gaze_data.setLayer("right_eye_image",
                               to_planar(right_img, (60, 60)))
            gaze_data.setLayer("head_pose_angles", self.pose)
            gaze_in.send(gaze_data)
Example #21
0
    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues 
        if self.camera:
            q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False)
                q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True)
                q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()
            if self.camera:
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
            else:
                if self.image_mode:
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                dx = (w - self.video_size) // 2
                dy = (h - self.video_size) // 2
                video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size]
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length)))
                q_pd_in.send(frame_nn)
                pd_rtrip_time = now()

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Hand landmarks
            if self.use_lm:
                for i,r in enumerate(self.regions):
                    img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length)
                    nn_data = dai.NNData()   
                    nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length)))
                    q_lm_in.send(nn_data)
                    if i == 0: lm_rtrip_time = now() # We measure only for the first region
                
                # Retrieve hand landmarks
                for i,r in enumerate(self.regions):
                    inference = q_lm_out.get()
                    if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
                    nb_lm_inferences += 1

                
            self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100))
            cv2.imshow("video", annotated_frame)

            key = cv2.waitKey(1) 
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_handedness = not self.show_handedness
            elif key == ord('6'):
                self.show_scores = not self.show_scores
            elif key == ord('7'):
                self.show_gesture = not self.show_gesture

        # Print some stats
        if not self.camera:
            print(f"# video files frames                 : {seq_num}")
            print(f"# palm detection inferences received : {nb_pd_inferences}")
            print(f"# hand landmark inferences received  : {nb_lm_inferences}")
            print(f"Palm detection round trip            : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms")
            print(f"Hand landmark round trip             : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
Example #22
0
    def run(self):
        self.threads = [
            threading.Thread(target=self.face_thread),
            threading.Thread(target=self.land_pose_thread),
            threading.Thread(target=self.gaze_thread)
        ]
        for thread in self.threads:
            thread.start()

        while self.should_run():
            try:
                read_correctly, new_frame = self.get_frame()
            except RuntimeError:
                continue

            if not read_correctly:
                break

            self.fps.update()
            self.frame = new_frame
            self.debug_frame = self.frame.copy()

            if not camera:
                nn_data = depthai.NNData()
                nn_data.setLayer("data", to_planar(self.frame, (300, 300)))
                self.face_in.send(nn_data)

            if debug:  # face
                if self.gaze is not None and self.left_bbox is not None and self.right_bbox is not None:
                    re_x = (self.right_bbox[0] + self.right_bbox[2]) // 2
                    re_y = (self.right_bbox[1] + self.right_bbox[3]) // 2
                    le_x = (self.left_bbox[0] + self.left_bbox[2]) // 2
                    le_y = (self.left_bbox[1] + self.left_bbox[3]) // 2

                    x, y = (self.gaze * 100).astype(int)[:2]

                    if args.lazer:
                        beam_img = np.zeros(self.debug_frame.shape, np.uint8)
                        for t in range(10)[::-2]:
                            cv2.line(beam_img, (re_x, re_y),
                                     ((re_x + x * 100), (re_y - y * 100)),
                                     (0, 0, 255 - t * 10), t * 2)
                            cv2.line(beam_img, (le_x, le_y),
                                     ((le_x + x * 100), (le_y - y * 100)),
                                     (0, 0, 255 - t * 10), t * 2)
                        self.debug_frame |= beam_img

                    else:
                        cv2.arrowedLine(self.debug_frame, (le_x, le_y),
                                        (le_x + x, le_y - y), (255, 0, 255), 3)
                        cv2.arrowedLine(self.debug_frame, (re_x, re_y),
                                        (re_x + x, re_y - y), (255, 0, 255), 3)

                if not args.lazer:
                    for raw_bbox in self.bboxes:
                        bbox = frame_norm(self.frame, raw_bbox)
                        cv2.rectangle(self.debug_frame, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), (10, 245, 10), 2)
                    if self.nose is not None:
                        cv2.circle(self.debug_frame,
                                   (self.nose[0], self.nose[1]),
                                   2, (0, 255, 0),
                                   thickness=5,
                                   lineType=8,
                                   shift=0)
                    if self.left_bbox is not None:
                        cv2.rectangle(self.debug_frame,
                                      (self.left_bbox[0], self.left_bbox[1]),
                                      (self.left_bbox[2], self.left_bbox[3]),
                                      (245, 10, 10), 2)
                    if self.right_bbox is not None:
                        cv2.rectangle(self.debug_frame,
                                      (self.right_bbox[0], self.right_bbox[1]),
                                      (self.right_bbox[2], self.right_bbox[3]),
                                      (245, 10, 10), 2)
                    if self.pose is not None and self.nose is not None:
                        draw_3d_axis(self.debug_frame, self.pose, self.nose)

                if camera:
                    cv2.imshow("Camera view", self.debug_frame)
                else:
                    aspect_ratio = self.frame.shape[1] / self.frame.shape[0]
                    cv2.imshow(
                        "Video view",
                        cv2.resize(self.debug_frame,
                                   (int(900), int(900 / aspect_ratio))))
                if cv2.waitKey(1) == ord('q'):
                    cv2.destroyAllWindows()
                    break

        self.fps.stop()
        print("FPS: {:.2f}".format(self.fps.fps()))
        if not camera:
            self.cap.release()
        cv2.destroyAllWindows()
        for i in range(1, 5):  # https://stackoverflow.com/a/25794701/5494277
            cv2.waitKey(1)
        self.running = False
Example #23
0
    def land_pose_thread(self):
        landmark_nn = self.device.getOutputQueue(name="landmark_nn",
                                                 maxSize=1,
                                                 blocking=False)
        pose_nn = self.device.getOutputQueue(name="pose_nn",
                                             maxSize=1,
                                             blocking=False)
        gaze_in = self.device.getInputQueue("gaze_in")

        while self.running:
            try:
                land_in = landmark_nn.get().getFirstLayerFp16()
            except RuntimeError as ex:
                continue

            try:
                face_bbox = self.face_box_q.get(block=True, timeout=100)
            except queue.Empty:
                continue

            self.face_box_q.task_done()
            left = face_bbox[0]
            top = face_bbox[1]
            face_frame = self.frame[face_bbox[1]:face_bbox[3],
                                    face_bbox[0]:face_bbox[2]]
            land_data = frame_norm(face_frame, land_in)
            land_data[::2] += left
            land_data[1::2] += top
            left_bbox = padded_point(land_data[:2],
                                     padding=30,
                                     frame_shape=self.frame.shape)
            if left_bbox is None:
                print("Point for left eye is corrupted, skipping nn result...")
                continue
            self.left_bbox = left_bbox
            right_bbox = padded_point(land_data[2:4],
                                      padding=30,
                                      frame_shape=self.frame.shape)
            if right_bbox is None:
                print(
                    "Point for right eye is corrupted, skipping nn result...")
                continue
            self.right_bbox = right_bbox
            self.nose = land_data[4:6]
            left_img = self.frame[self.left_bbox[1]:self.left_bbox[3],
                                  self.left_bbox[0]:self.left_bbox[2]]
            right_img = self.frame[self.right_bbox[1]:self.right_bbox[3],
                                   self.right_bbox[0]:self.right_bbox[2]]

            try:
                # The output of  pose_nn is in YPR  format, which is the required sequence input for pose in  gaze
                # https://docs.openvinotoolkit.org/2020.1/_models_intel_head_pose_estimation_adas_0001_description_head_pose_estimation_adas_0001.html
                # https://docs.openvinotoolkit.org/latest/omz_models_model_gaze_estimation_adas_0002.html
                # ... three head pose angles – (yaw, pitch, and roll) ...
                values = to_tensor_result(pose_nn.get())
                self.pose = [
                    values['angle_y_fc'][0][0], values['angle_p_fc'][0][0],
                    values['angle_r_fc'][0][0]
                ]
            except RuntimeError as ex:
                continue

            gaze_data = depthai.NNData()
            gaze_data.setLayer("left_eye_image", to_planar(left_img, (60, 60)))
            gaze_data.setLayer("right_eye_image",
                               to_planar(right_img, (60, 60)))
            gaze_data.setLayer("head_pose_angles", self.pose)
            gaze_in.send(gaze_data)
Example #24
0
    # nn data (bounding box locations) are in <0..1> range - they need to be normalized with frame width/height
    def frame_norm(frame, bbox):
        norm_vals = np.full(len(bbox), frame.shape[0])
        norm_vals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * norm_vals).astype(int)

    for nextfile in tqdm(glob.glob("unlabeld/*.jpg")):
        name = nextfile[9:-4]
        #print(name)
        # load image into frame
        frame = cv2.imread(nextfile, cv2.IMREAD_COLOR)
        original_frame = frame.copy()
        # resize frame to 300x300
        frame = cv2.resize(frame, (300, 300), interpolation=cv2.INTER_AREA)

        var_data = dai.NNData()
        var_data.setLayer("data", to_planar(frame, (300, 300)))
        q_img_in.send(var_data)

        in_nn = q_nn.get()
        detections = in_nn.detections

        annotation = ET.Element("annotation")
        folder = ET.SubElement(annotation, "folder").text = "allimages"
        filename = ET.SubElement(annotation, "filename").text = f"{name}.jpg"
        path = ET.SubElement(
            annotation, "path"
        ).text = f"D:\\Hobby\\tgmb\\to-bee-or-not-to-bee\\allimages\\{name}.jpg"

        source = ET.SubElement(annotation, "source")
        database = ET.SubElement(source, "database").text = "Unknown"
    def process_image(self, img):
        annotated_frame = img
        if self.camera:
            in_video = self.q_video.get()
            # Convert NV12 to BGR
            yuv = in_video.getData().reshape(
                (in_video.getHeight() * 3 // 2, in_video.getWidth()))
            video_frame = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_NV12)
        else:
            if self.image_mode is None:
                vid_frame = img
                height, width, _ = img.shape
                self.video_size = int(min(width, height))
            elif self.image_mode:
                vid_frame = self.img
            else:
                ok, vid_frame = self.cap.read()
                if not ok:
                    # print("not OK video frame")
                    return [], img  #break

            h, w = vid_frame.shape[:2]
            dx = (w - self.video_size) // 2
            dy = (h - self.video_size) // 2
            video_frame = vid_frame[dy:dy + self.video_size,
                                    dx:dx + self.video_size]
            frame_nn = dai.ImgFrame()
            frame_nn.setSequenceNum(self.seq_num)
            frame_nn.setWidth(self.pd_input_length)
            frame_nn.setHeight(self.pd_input_length)
            frame_nn.setData(
                to_planar(video_frame,
                          (self.pd_input_length, self.pd_input_length)))

            self.q_pd_in.send(frame_nn)

            self.seq_num += 1

        annotated_frame = video_frame.copy()

        inference = self.q_pd_out.get()
        self.pd_postprocess(inference)
        self.pd_render(annotated_frame)

        # Hand landmarks
        if self.use_lm:
            for i, r in enumerate(self.regions):
                img_hand = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(img_hand,
                              (self.lm_input_length, self.lm_input_length)))
                self.q_lm_in.send(nn_data)

            # Retrieve hand landmarks
            for i, r in enumerate(self.regions):
                inference = self.q_lm_out.get()
                self.lm_postprocess(r, inference)
                self.lm_render(annotated_frame, r)

        return self.regions, annotated_frame
Example #26
0
    def inference_task(self):
        # Queues
        detection_passthrough = self.device.getOutputQueue(
            "detection_passthrough")
        detection_nn = self.device.getOutputQueue("detection_nn")

        bboxes = []
        results = {}
        results_path = {}
        next_id = 0

        # Match up frames and detections
        try:
            prev_passthrough = detection_passthrough.getAll()[0]
            prev_inference = detection_nn.getAll()[0]
        except RuntimeError:
            pass

        fps = 0
        t_fps = time.time()
        while self.running:
            try:

                # Get current detection
                passthrough = detection_passthrough.getAll()[0]
                inference = detection_nn.getAll()[0]

                # Count NN fps
                fps = fps + 1

                # Combine all frames to current inference
                frames = []
                while True:

                    frm = self.frame_queue.get()
                    if camera and hq:
                        # Convert NV12 to BGR
                        yuv = frm.getData().reshape(
                            (frm.getHeight() * 3 // 2, frm.getWidth()))
                        cv_frame = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_NV12)
                    else:
                        # get the frames corresponding to inference
                        cv_frame = np.ascontiguousarray(frm.getData().reshape(
                            3, frm.getHeight(),
                            frm.getWidth()).transpose(1, 2, 0))

                    frames.append(cv_frame)

                    # Break out once all frames received for the current inference
                    if frm.getSequenceNum(
                    ) >= prev_passthrough.getSequenceNum() - 1:
                        break

                infered_frame = frames[0]

                # Send bboxes to be infered upon
                for det in inference.detections:
                    raw_bbox = [det.xmin, det.ymin, det.xmax, det.ymax]
                    bbox = frame_norm(infered_frame, raw_bbox)
                    det_frame = infered_frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
                    nn_data = dai.NNData()
                    nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
                    self.device.getInputQueue("reid_in").send(nn_data)

                # Retrieve infered bboxes
                for det in inference.detections:

                    raw_bbox = [det.xmin, det.ymin, det.xmax, det.ymax]
                    bbox = frame_norm(infered_frame, raw_bbox)

                    reid_result = self.device.getOutputQueue(
                        "reid_nn").get().getFirstLayerFp16()

                    for person_id in results:
                        dist = cos_dist(reid_result, results[person_id])
                        if dist > 0.7:
                            result_id = person_id
                            results[person_id] = reid_result
                            break
                    else:
                        result_id = next_id
                        results[result_id] = reid_result
                        results_path[result_id] = []
                        next_id += 1

                    if debug:
                        for frame in frames:
                            cv2.rectangle(frame, (bbox[0], bbox[1]),
                                          (bbox[2], bbox[3]), (10, 245, 10), 2)
                            x = (bbox[0] + bbox[2]) // 2
                            y = (bbox[1] + bbox[3]) // 2
                            results_path[result_id].append([x, y])
                            cv2.putText(frame, str(result_id), (x, y),
                                        cv2.FONT_HERSHEY_TRIPLEX, 1.0,
                                        (255, 255, 255))
                            if len(results_path[result_id]) > 1:
                                cv2.polylines(frame, [
                                    np.array(results_path[result_id],
                                             dtype=np.int32)
                                ], False, (255, 0, 0), 2)
                    else:
                        print(f"Saw id: {result_id}")

                # Send of to visualization thread
                for frame in frames:
                    # put nn_fps
                    if debug:
                        cv2.putText(frame, 'NN FPS: ' + str(self.nn_fps),
                                    (5, 40), cv2.FONT_HERSHEY_DUPLEX, 1.0,
                                    (255, 0, 0), 2)

                    if self.visualization_queue.full():
                        self.visualization_queue.get_nowait()
                    self.visualization_queue.put(frame)

                # Move current to prev
                prev_passthrough = passthrough
                prev_inference = inference

                if time.time() - t_fps >= 1.0:
                    self.nn_fps = round(fps / (time.time() - t_fps), 2)
                    fps = 0
                    t_fps = time.time()

            except RuntimeError:
                continue
Example #27
0
    def run(self):
        self.threads = [
            threading.Thread(target=self.run_face, daemon=True),
            threading.Thread(target=self.run_land68, daemon=True)
        ]
        for thread in self.threads:
            thread.start()
        while self.should_run():
            read_correctly, new_frame = self.get_frame()
            if not read_correctly:
                break
            self.fps.update()
            self.frame = new_frame
            self.debug_frame = self.frame.copy()
            if not camera:
                nn_data = depthai.NNData()
                nn_data.setLayer("data", to_planar(self.frame, (300, 300)))
                self.face_in.send(nn_data)
            if debug:
                if self.results.qsize() > 0 and self.face_bboxs.qsize() > 0:
                    try:
                        for i in range(self.results.qsize()):
                            face_bbox = self.face_bboxs.get()
                            result = self.results.get()
                            bbox = frame_norm(self.frame, self.bboxes[i])
                            self.draw_bbox(bbox, (0, 255, 0))
                            self.hand_points = []
                            # 17 Left eyebrow upper left corner/21 Left eyebrow right corner/22 Right eyebrow upper left corner/26 Right eyebrow upper right corner/36 Left eye upper left corner/39 Left eye upper right corner/42 Right eye upper left corner/
                            # 45 Upper right corner of the right eye/31 Upper left corner of the nose/35 Upper right corner of the nose/48 Upper left corner/54 Upper right corner of the mouth/57 Lower central corner of the mouth/8 Chin corner
                            # The coordinates are two points, so you have to multiply by 2.
                            self.hand_points.append(
                                (result[34] + face_bbox[0],
                                 result[35] + face_bbox[1]))
                            self.hand_points.append(
                                (result[42] + face_bbox[0],
                                 result[43] + face_bbox[1]))
                            self.hand_points.append(
                                (result[44] + face_bbox[0],
                                 result[45] + face_bbox[1]))
                            self.hand_points.append(
                                (result[52] + face_bbox[0],
                                 result[53] + face_bbox[1]))
                            self.hand_points.append(
                                (result[72] + face_bbox[0],
                                 result[73] + face_bbox[1]))
                            self.hand_points.append(
                                (result[78] + face_bbox[0],
                                 result[79] + face_bbox[1]))
                            self.hand_points.append(
                                (result[84] + face_bbox[0],
                                 result[85] + face_bbox[1]))
                            self.hand_points.append(
                                (result[90] + face_bbox[0],
                                 result[91] + face_bbox[1]))
                            self.hand_points.append(
                                (result[62] + face_bbox[0],
                                 result[63] + face_bbox[1]))
                            self.hand_points.append(
                                (result[70] + face_bbox[0],
                                 result[71] + face_bbox[1]))
                            self.hand_points.append(
                                (result[96] + face_bbox[0],
                                 result[97] + face_bbox[1]))
                            self.hand_points.append(
                                (result[108] + face_bbox[0],
                                 result[109] + face_bbox[1]))
                            self.hand_points.append(
                                (result[114] + face_bbox[0],
                                 result[115] + face_bbox[1]))
                            self.hand_points.append(
                                (result[16] + face_bbox[0],
                                 result[17] + face_bbox[1]))
                            for i in self.hand_points:
                                cv2.circle(self.debug_frame, (i[0], i[1]),
                                           2, (255, 0, 0),
                                           thickness=1,
                                           lineType=8,
                                           shift=0)
                            reprojectdst, _, pitch, yaw, roll = get_head_pose(
                                np.array(self.hand_points))
                            """
                            pitch > 0 Head down, < 0 look up
                            yaw > 0 Turn right < 0 Turn left
                            roll > 0 Tilt right, < 0 Tilt left
                            """
                            cv2.putText(
                                self.debug_frame,
                                "pitch:{:.2f}, yaw:{:.2f}, roll:{:.2f}".format(
                                    pitch, yaw, roll),
                                (face_bbox[0] - 30, face_bbox[1] - 30),
                                cv2.FONT_HERSHEY_COMPLEX, 0.45, (255, 0, 0))

                            hand_attitude = np.array(
                                [abs(pitch), abs(yaw),
                                 abs(roll)])
                            max_index = np.argmax(hand_attitude)
                            if max_index == 0:
                                if pitch > 0:
                                    cv2.putText(
                                        self.debug_frame, "Head down",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                                else:
                                    cv2.putText(
                                        self.debug_frame, "look up",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                            elif max_index == 1:
                                if yaw > 0:
                                    cv2.putText(
                                        self.debug_frame, "Turn right",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                                else:
                                    cv2.putText(
                                        self.debug_frame, "Turn left",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                            elif max_index == 2:
                                if roll > 0:
                                    cv2.putText(
                                        self.debug_frame, "Tilt right",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                                else:
                                    cv2.putText(
                                        self.debug_frame, "Tilt left",
                                        (face_bbox[0], face_bbox[1] - 10),
                                        cv2.FONT_HERSHEY_COMPLEX, 0.5,
                                        (235, 10, 10))
                            # Draw a cube with 12 axes
                            line_pairs = [[0, 1], [1, 2], [2, 3], [3,
                                                                   0], [4, 5],
                                          [5, 6], [6, 7], [7, 4], [0, 4],
                                          [1, 5], [2, 6], [3, 7]]
                            for start, end in line_pairs:
                                cv2.line(self.debug_frame, reprojectdst[start],
                                         reprojectdst[end], (0, 0, 255))
                    except:
                        pass
                if camera:
                    cv2.imshow("Camera view", self.debug_frame)
                else:
                    aspect_ratio = self.frame.shape[1] / self.frame.shape[0]
                    cv2.imshow(
                        "Video view",
                        cv2.resize(self.debug_frame,
                                   (int(900), int(900 / aspect_ratio))))
                if cv2.waitKey(1) == ord('q'):
                    cv2.destroyAllWindows()
                    break

        self.fps.stop()
        print("FPS:{:.2f}".format(self.fps.fps()))
        if not camera:
            self.cap.release()
        cv2.destroyAllWindows()
        self.running = False
        for thread in self.threads:
            thread.join(2)
            if thread.is_alive():
                break
Example #28
0
    def process(self, context):
        video_frame = context["frame"][OAK_Stage.COLOR]

        if video_frame is None:
            if self.videoframe is None:
                return
            else:
                video_frame = self.videoframe
        else:
            self.videoframe = video_frame

        self.blazepose.frame_size = video_frame.shape[0]
        pd_inference = context["output_queues"]["blazepose_pd"].tryGet()

        if pd_inference is not None:
            self.blazepose.pd_postprocess(pd_inference)
        else:
            return

        self.blazepose.nb_active_regions = 0

        bodyposes = []
        bodyposes_3d = []

        for i, r in enumerate(self.blazepose.regions):
            frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                         self.blazepose.lm_input_length,
                                         self.blazepose.lm_input_length)
            nn_data = dai.NNData()
            nn_data.setLayer(
                "input_1",
                to_planar(frame_nn, (self.blazepose.lm_input_length,
                                     self.blazepose.lm_input_length)))
            context["input_queues"]['blazepose_lm_in'].send(nn_data)

            lm_inference = context["output_queues"]['blazepose_lm'].get()

            self.blazepose.lm_postprocess(r, lm_inference)

            if r.lm_score < self.blazepose.lm_score_threshold:

                continue

            bp = BodyPose(frame_id=context["frame_id"], pixel_space=True)
            bp_3d = None

            points = r.landmarks_abs
            bp_3d = BodyPose(frame_id=context["frame_id"], pixel_space=False)

            for i, x_y in enumerate(r.landmarks_padded[:, :2]):

                name = OAK_Blazepose.kp_name[i]

                if name is None:
                    continue

                bp.add_keypoint(name, x_y[0], x_y[1])
                bp_3d.add_keypoint(name, points[i][0], points[i][1],
                                   points[i][2])

            bodyposes.append(bp)
            bodyposes_3d.append(bp_3d)

        self._setOutput(bodyposes, "bodypose_list")
        self._setOutput(bodyposes_3d, "bodypose3d_list")
Example #29
0
            return True, np.array(cam_out.get().getData()).reshape(
                (3, 300, 300)).transpose(1, 2, 0).astype(np.uint8)

    try:
        while should_run():
            read_correctly, frame = get_frame()

            if not read_correctly:
                break

            if frame is not None:
                fps.update()
                debug_frame = frame.copy()

                if not args.camera:
                    nn_data = depthai.NNData()
                    nn_data.setLayer("input", to_planar(frame, (300, 300)))
                    detection_in.send(nn_data)

            while detection_nn.has():
                bboxes = np.array(detection_nn.get().getFirstLayerFp16())
                bboxes = bboxes.reshape((bboxes.size // 7, 7))
                bboxes = bboxes[bboxes[:, 2] > 0.7][:, 3:7]

                for raw_bbox in bboxes:
                    bbox = frame_norm(frame, raw_bbox)
                    det_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]

                    nn_data = depthai.NNData()
                    nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
                    age_gender_in.send(nn_data)
def process_image(transform, processing_model, img):
    global useOAKDCam, bboxes, results, pd_score_thresh, pd_nms_thresh, bboxes, anchors, device, q_rgb, q_nn, fps, q_in
    tracks = []
    # (q_rgb,q_nn) = processing_model
    try:
        # if useOAKDCam:
        # #     ret, frame = cap.read()
        #     frame = np.array(cam_out.get().getData()).reshape((3, 300, 300)).transpose(1, 2, 0).astype(np.uint8)
        #     shape = (3, frame.getHeight(), frame.getWidth())
        #     frame = in_rgb.getData().reshape(shape).transpose(1, 2, 0).astype(np.uint8)
        #     frame = np.ascontiguousarray(frame)
        # else:
        frame = img

        #palm detection https://github.com/geaxgx/oakd_palm_detection
        if transform == 'oakd_palm':
            if device is None:
                # Start defining a pipeline
                pipeline = dai.Pipeline()

                if useOAKDCam:
                    # Define a source - color camera
                    cam_rgb = pipeline.createColorCamera()
                    cam_rgb.setPreviewSize(128, 128)
                    cam_rgb.setFps(90.0)
                    cam_rgb.setInterleaved(False)

                # Define a neural network that will make predictions based on the source frames
                detection_nn = pipeline.createNeuralNetwork()
                detection_nn.setBlobPath(
                    str(
                        Path(
                            "../oakd_palm_detection/models/palm_detection.blob"
                        ).resolve().absolute()))

                if useOAKDCam:
                    cam_rgb.preview.link(detection_nn.input)
                else:
                    detection_in = pipeline.createXLinkIn()
                    detection_in.setStreamName("detection_in")
                    detection_in.out.link(detection_nn.input)

                # Create outputs
                if useOAKDCam:
                    xout_rgb = pipeline.createXLinkOut()
                    xout_rgb.setStreamName("rgb")
                    cam_rgb.preview.link(xout_rgb.input)

                xout_nn = pipeline.createXLinkOut()
                xout_nn.setStreamName("nn")
                detection_nn.out.link(xout_nn.input)

                # Pipeline defined, now the device is assigned and pipeline is started
                device = dai.Device(pipeline)
                device.startPipeline()

                if useOAKDCam:
                    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
                    q_rgb = device.getOutputQueue(name="rgb",
                                                  maxSize=4,
                                                  blocking=False)
                else:
                    q_in = device.getInputQueue("detection_in")

                q_nn = device.getOutputQueue(name="nn",
                                             maxSize=4,
                                             blocking=False)

            # fps.update()
            # if frame is not None:

            if not useOAKDCam:
                nn_data = dai.NNData()
                nn_data.setLayer("input", to_planar(frame, (128, 128)))
                q_in.send(nn_data)

                # in_nn = q_nn.get()
                in_nn = q_nn.tryGet()
                # 2 output layers:
                # - classificators:
                # - regressors :
                # From: print(in_nn.getAllLayerNames())

                if in_nn is not None:
                    scores = np.array(in_nn.getLayerFp16("classificators"))
                    bboxes = np.array(
                        in_nn.getLayerFp16("regressors")).reshape((896, 18))

                    # Decode bboxes
                    regions = decode_bboxes(pd_score_thresh, 128, 128, scores,
                                            bboxes, anchors)
                    # Non maximum suppression
                    regions = non_max_suppression(regions, pd_nms_thresh)
                    tracks = regions
                    for r in regions:
                        raw_bbox = (np.array(r.pd_box) * 128).astype(int)
                        # box = raw_bbox
                        # print("raw_bbox",raw_bbox)
                        # print("frame.shape",frame.shape)
                        box = frame_norm3(frame, raw_bbox)
                        # print("box3",box)
                        # cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255, 255, 255), 2)
                        cv2.rectangle(frame, (box[0], box[1]),
                                      (box[0] + box[2], box[1] + box[3]),
                                      (255, 255, 0), 2)

                    # if frame is not None:
                    #     img = frame
                    if frame is not None:
                        # cv2.putText(frame, "FPS: {:.2f}".format(fps.get()), (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0,0,255), 1)
                        # cv2.imshow("rgb", frame)
                        img = frame

            else:
                # in_rgb = q_rgb.tryGet()

                in_rgb = q_rgb.get()

                if in_rgb is not None:
                    # if the data from the rgb camera is available, transform the 1D data into a HxWxC frame
                    shape = (3, in_rgb.getHeight(), in_rgb.getWidth())

                    frame = in_rgb.getData().reshape(shape).transpose(
                        1, 2, 0).astype(np.uint8)
                    frame = np.ascontiguousarray(frame)
                    in_nn = q_nn.get()
                    # 2 output layers:
                    # - classificators:
                    # - regressors :
                    # From: print(in_nn.getAllLayerNames())

                    if in_nn is not None:
                        scores = np.array(in_nn.getLayerFp16("classificators"))
                        bboxes = np.array(
                            in_nn.getLayerFp16("regressors")).reshape(
                                (896, 18))

                        # Decode bboxes
                        regions = decode_bboxes(pd_score_thresh, 128, 128,
                                                scores, bboxes, anchors)
                        # Non maximum suppression
                        regions = non_max_suppression(regions, pd_nms_thresh)
                        tracks = regions
                        for r in regions:
                            box = (np.array(r.pd_box) * 128).astype(int)
                            cv2.rectangle(frame, (box[0], box[1]),
                                          (box[0] + box[2], box[1] + box[3]),
                                          (255, 255, 0), 2)

                    # if frame is not None:
                    #     img = frame
                    if frame is not None:
                        # cv2.putText(frame, "FPS: {:.2f}".format(fps.get()), (10,10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0,0,255), 1)
                        # cv2.imshow("rgb", frame)
                        img = frame

                    # if cv2.waitKey(1) == ord('q'):
                    #     pass

    except Exception as e:
        track = traceback.format_exc()
        print(track)
        print("OAK-D Exception", e)
        pass

    return tracks, img