Python frameNormの例、depthai_sdk.frameNorm Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: OAKChina/depthai-examples

    def run_face(self):
        # img, scale, top, left = resize_padding(self.frame, 300, 300)

        if not self.camera:
            nn_data = run_nn(
                self.face_in,
                self.face_nn,
                {"data": toPlanar(self.frame, (300, 300))},
            )
        else:
            nn_data = self.face_nn.tryGet()
        if nn_data is None:
            return False

        bboxes = nn_data.detections
        self.number_of_people = len(bboxes)
        for bbox in bboxes:
            face_coord = frameNorm(
                self.debug_frame, [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])
            # face_coord = restore_point(face_coord, scale, top, left).astype(int)
            face_coord = scale_bbox(face_coord)
            self.face_frames.put(self.frame[face_coord[1]:face_coord[3],
                                            face_coord[0]:face_coord[2]])
            self.face_coords.put(face_coord)
            self.draw_bbox(face_coord, (10, 245, 10))

        return True

コード例 #2

0

ファイルを表示

def show_boxes_and_regions(frame, boxes, masks):
    for i, box in enumerate(boxes):
        if box[0] == -1:
            break

        cls = int(box[1])
        prob = box[2]

        if prob < THRESHOLD:
            continue

        bbox = frameNorm(frame, box[-4:])
        cv2.rectangle(frame, (bbox[0], bbox[1] - 15), (bbox[2], bbox[1]),
                      COLORS[cls], -1)
        cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                      COLORS[cls], 1)
        cv2.putText(frame, f"{LABEL_MAP[cls-1]}: {prob:.2f}",
                    (bbox[0] + 5, bbox[1] - 5), cv2.FONT_HERSHEY_DUPLEX, 0.3,
                    (0, 0, 0), 2)
        cv2.putText(frame, f"{LABEL_MAP[cls-1]}: {prob:.2f}",
                    (bbox[0] + 5, bbox[1] - 5), cv2.FONT_HERSHEY_DUPLEX, 0.3,
                    (255, 255, 255), 1)

        bbox_w = bbox[2] - bbox[0]
        bbox_h = bbox[3] - bbox[1]

        mask = cv2.resize(masks[i, cls], (bbox_w, bbox_h))
        mask = mask > REGION_THRESHOLD

        roi = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        roi[mask] = roi[mask] * 0.6 + COLORS[cls] * 0.4
        frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] = roi

コード例 #3

0

ファイルを表示

ファイル: vehicle.py プロジェクト: OAKChina/depthai-examples

def vehicle(source, video_path, output, fps, frame_size):
    """
    车辆属性识别和车牌识别
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            vehicle_in = device.getInputQueue("vehicle_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        vehicle_nn = device.getOutputQueue("vehicle_nn")

        attr_in = device.getInputQueue("attr_in")
        attr_nn = device.getOutputQueue("attr_nn")
        license_in = device.getInputQueue("license_in")
        license_nn = device.getOutputQueue("license_nn")

        colors = ["white", "gray", "yellow", "red", "green", "blue", "black"]
        types = ["car", "bus", "truck", "van"]

        license_dict = [
            *map(chr, range(48, 58)),
            "<Anhui>",
            "<Beijing>",
            "<Chongqing>",
            "<Fujian>",
            "<Gansu>",
            "<Guangdong>",
            "<Guangxi>",
            "<Guizhou>",
            "<Hainan>",
            "<Hebei>",
            "<Heilongjiang>",
            "<Henan>",
            "<HongKong>",
            "<Hubei>",
            "<Hunan>",
            "<InnerMongolia>",
            "<Jiangsu>",
            "<Jiangxi>",
            "<Jilin>",
            "<Liaoning>",
            "<Macau>",
            "<Ningxia>",
            "<Qinghai>",
            "<Shaanxi>",
            "<Shandong>",
            "<Shanghai>",
            "<Shanxi>",
            "<Sichuan>",
            "<Tianjin>",
            "<Tibet>",
            "<Xinjiang>",
            "<Yunnan>",
            "<Zhejiang>",
            "<police>",
            *map(chr, range(65, 91)),
        ]

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, vehicle_in, 300, 300)
            vehicle_data = vehicle_nn.get().detections
            fps_handler.tick("vehicle")

            for bbox in vehicle_data:
                if bbox.label == 1:
                    vehicle_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])

                    cv2.rectangle(frame_debug, vehicle_coord[:2],
                                  vehicle_coord[2:], (128, 128, 0))

                    vehicle_frame = frame[vehicle_coord[1]:vehicle_coord[3],
                                          vehicle_coord[0]:vehicle_coord[2], ]
                    run_nn(vehicle_frame, attr_in, 72, 72)
                    attr_data = toTensorResult(attr_nn.get())
                    color_ = colors[attr_data.get("color").argmax()]
                    type_ = types[attr_data.get("type").argmax()]
                    drawText(
                        frame_debug,
                        color_,
                        (vehicle_coord[0] + 10, vehicle_coord[1] + 10),
                    )
                    drawText(
                        frame_debug,
                        type_,
                        (vehicle_coord[0] + 10, vehicle_coord[1] + 25),
                    )
                elif bbox.label == 2:
                    plate_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])
                    cv2.rectangle(frame_debug, plate_coord[:2],
                                  plate_coord[2:], (128, 128, 0))

                    plate_frame = frame[plate_coord[1]:plate_coord[3],
                                        plate_coord[0]:plate_coord[2], ]
                    plate_frame = pad_resize(plate_frame, (24, 94))

                    # cv2.imshow("pl",plate_frame.astype(np.uint8))
                    run_nn(plate_frame, license_in, 94, 24)
                    license_data = (toTensorResult(
                        license_nn.get()).get("d_predictions.0").squeeze())
                    plate_str = ""
                    for j in license_data:
                        if j == -1:
                            break
                        plate_str += license_dict[j]
                    drawText(
                        frame_debug,
                        plate_str,
                        (plate_coord[0] - 10, plate_coord[1] - 10),
                    )
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()

コード例 #4

0

ファイルを表示

ファイル: palm.py プロジェクト: OAKChina/depthai-examples

def palm(source, video_path, output, fps, frame_size):
    """
    手掌检测,控制鼠标
    """
    # click.echo(click.get_current_context().params)
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        fps_handler = FPSHandler()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            palm_in = device.getInputQueue("palm_in")
        else:
            cam_out = device.getOutputQueue("rgb")

        palm_nn = device.getOutputQueue("palm_nn")

        dots = []

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(frame_debug, palm_in, 128, 128)
            results = toTensorResult(palm_nn.get())
            fps_handler.tick("palm")

            num_keypoints = 7
            min_score_thresh = 0.7
            anchors = np.load("anchors_palm.npy")

            raw_box_tensor = results.get("regressors")  # regress
            raw_score_tensor = results.get("classificators")  # classification
            detections = raw_to_detections(raw_box_tensor, raw_score_tensor,
                                           anchors, (128, 128), num_keypoints)

            palm_coords = [
                frameNorm(frame, obj[:4]) for det in detections for obj in det
                if obj[-1] > min_score_thresh
            ]

            palm_confs = [
                obj[-1] for det in detections for obj in det
                if obj[-1] > min_score_thresh
            ]

            if len(palm_coords) > 0:
                palm_coords = non_max_suppression(
                    boxes=np.concatenate(palm_coords).reshape(-1, 4),
                    probs=palm_confs,
                    overlapThresh=0.1,
                )

                for bbox in palm_coords:
                    cv2.rectangle(frame_debug, bbox[:2], bbox[2:],
                                  (10, 245, 10))
                    dot_x = (bbox[2] + bbox[0]) / 2
                    dot_y = (bbox[3] + bbox[1]) / 2
                    dots = move_mouse(dots, (dot_x, dot_y), frame.shape[:2])
            cv2.imshow("", frame_debug)
            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        fps_handler.printStatus()
        if source:
            cap.release()
        if output:
            writer.release()
        cv2.destroyAllWindows()

コード例 #5

0

ファイルを表示

ファイル: facial_info.py プロジェクト: OAKChina/depthai-examples

def facial_info(source, video_path, output, fps, frame_size):
    """
    面部信息识别
    """
    device_info = getDeviceInfo()  # type: dai.DeviceInfo
    with dai.Device(create_pipeline(source), device_info) as device:
        print("Starting pipeline...")
        # device.startPipeline()
        if source:
            cap = cv2.VideoCapture(video_path)
            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_shape = [frame_height, frame_width]
            print("CAP_PROP_FRAME_SHAPE: %s" % frame_shape)
            cap_fps = int(cap.get(cv2.CAP_PROP_FPS))
            print("CAP_PROP_FPS: %d" % cap_fps)

            face_in = device.getInputQueue("face_in")
        else:
            cam_out = device.getOutputQueue("cam_out", 1, True)
        face_nn = device.getOutputQueue("face_nn")
        head_pose_in = device.getInputQueue("head_pose_in")
        head_pose_nn = device.getOutputQueue("head_pose_nn")
        age_in = device.getInputQueue("age_in")
        age_nn = device.getOutputQueue("age_nn")
        emo_in = device.getInputQueue("emo_in")
        emo_nn = device.getOutputQueue("emo_nn")

        if output:
            output.parent.mkdir(parents=True, exist_ok=True)
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(str(output), fourcc, fps, frame_size)

        def should_run():
            if source:
                return cap.isOpened()
            else:
                return True

        def get_frame():
            if source:
                return cap.read()
            else:
                return True, cam_out.get().getCvFrame()

        fps_handler = FPSHandler()

        while should_run():
            read_correctly, frame = get_frame()
            if not read_correctly:
                break
            frame_debug = frame.copy()
            if source:
                run_nn(face_in, frame, 300, 300)
            face_nn_data = face_nn.get()
            fps_handler.tick("All")
            if face_nn_data is not None:
                bboxes = face_nn_data.detections

                for bbox in bboxes:
                    face_coord = frameNorm(
                        frame_debug,
                        [bbox.xmin, bbox.ymin, bbox.xmax, bbox.ymax])
                    face_frame = frame[face_coord[1]:face_coord[3],
                                       face_coord[0]:face_coord[2], ]
                    cv2.rectangle(
                        frame_debug,
                        (face_coord[0], face_coord[1]),
                        (face_coord[2], face_coord[3]),
                        (0, 0, 0),
                    )

                    run_nn(head_pose_in, face_frame, 60, 60)
                    roll_degree = toTensorResult(
                        head_pose_nn.get()).get("angle_r_fc")[0][0]
                    center = (
                        (face_coord[2] + face_coord[0]) / 2,
                        (face_coord[3] + face_coord[1]) / 2,
                    )
                    size = (
                        (face_coord[2] - face_coord[0]),
                        (face_coord[3] - face_coord[1]),
                    )
                    face_frame_corr = rotate_frame(
                        frame,
                        center,
                        size,
                        roll_degree,
                    )
                    cv2.imshow("face_frame_corr", face_frame_corr)

                    run_nn(age_in, face_frame_corr, 62, 62)
                    age_gender = toTensorResult(age_nn.get())
                    age = age_gender.get("age_conv3").squeeze() * 100
                    # 0 - female, 1 - male
                    gender = "Male" if age_gender.get(
                        "prob").argmax() else "Female"
                    drawText(
                        frame_debug,
                        f"Age: {age:0.0f}",
                        (face_coord[0] + 10, face_coord[1] + 30),
                        color="greenyellow",
                    )
                    drawText(
                        frame_debug,
                        f"Gender: {gender}",
                        (face_coord[0] + 10, face_coord[1] + 50),
                        "greenyellow",
                    )

                    run_nn(emo_in, face_frame_corr, 64, 64)
                    # 0 - 'neutral', 1 - 'happy', 2 - 'sad', 3 - 'surprise', 4 - 'anger'
                    emo = ["neutral", "happy", "sad", "surprise", "anger"]
                    emo = emo[toTensorResult(
                        emo_nn.get()).get("prob_emotion").argmax()]
                    drawText(
                        frame_debug,
                        f"emo: {emo}",
                        (face_coord[0] + 10, face_coord[1] + 70),
                        "greenyellow",
                    )

            if output:
                writer.write(cv2.resize(frame_debug, frame_size))

            cv2.imshow("debug", frame_debug)
            key = cv2.waitKey(1)
            if key in [ord("q"), 27]:
                break
            elif key == ord("s"):
                cv2.imwrite(
                    "saved_%s.jpg" %
                    time.strftime("%Y%m%d_%H%M%S", time.localtime()),
                    frame_debug,
                )
        if source:
            cap.release()
        if output:
            writer.release()
        fps_handler.printStatus()
        cv2.destroyAllWindows()