def main(args):
    """
    Main programm loop.

    Parameters
    ----------
    args : command line arguments parsed by parse_arguments
    """
    # Setup PoseEstimator, pipeline, windows with sliders for PoseEstimator
    # options and load video if running on local video file
    camera = args["video"] is None
    if args["model"] not in model_list:
        raise ValueError("Unknown model '{}'".format(args["model"]))
    model_config = model_list[args["model"]]
    pose_estimator = get_poseestimator(model_config, **args)

    with dai.Device(
            create_pipeline(model_config, camera, passthrough=True,
                            **args)) as device:
        device.startPipeline()

        if camera:
            preview_queue = device.getOutputQueue("preview",
                                                  maxSize=4,
                                                  blocking=False)
        else:
            pose_in_queue = device.getInputQueue("pose_in")
        pose_queue = device.getOutputQueue("pose")
        passthrough_queue = device.getOutputQueue("passthrough")

        # Load video if given in command line and set the variables used below
        # to control FPS and looping of the video
        if not camera:
            if not os.path.exists(args["video"]):
                raise ValueError("Video '{}' does not exist.".format(
                    args["video"]))
            print("Loading video", args["video"])
            video = cv2.VideoCapture(args["video"])
            frame_interval = 1 / video.get(cv2.CAP_PROP_FPS)
            last_frame_time = 0
            frame_id = 0
        else:
            print("Running on OAK camera preview stream")

        # Create windows for the original video and the video of frames from
        # the NN passthrough. The window for the original video gets all the
        # option sliders to change pose estimator config
        video_window_name = "Original Video"
        passthrough_window_name = "Processed Video"
        video_window = SliderWindow(video_window_name)
        cv2.namedWindow(passthrough_window_name)
        video_window.add_poseestimator_options(pose_estimator, args)

        # Start main loop
        frame = None
        keypoints = None
        fps = FPS("Video", "NN", interval=0.1)
        timer = Timer("inference", "decode")
        while True:
            # Check for and handle slider changes
            slider_changes = video_window.get_changes()
            for option_name, value in slider_changes.items():
                pose_estimator.set_option(option_name, value)

            fps.start_frame()
            # Get next video frame (and submit for processing if local video)
            if camera:
                frame = preview_queue.get().getCvFrame()
                fps.count("Video")
            else:
                frame_time = time.perf_counter()
                # Only grab next frame from file at certain intervals to
                # roughly preserve its original FPS
                if frame_time - last_frame_time > frame_interval:
                    if video.grab():
                        __, frame = video.retrieve()
                        fps.count("Video")
                        last_frame_time = frame_time
                        # Create DepthAI ImgFrame object to pass to the
                        # camera
                        input_frame = pose_estimator.get_input_frame(frame)
                        frame_nn = dai.ImgFrame()
                        frame_nn.setSequenceNum(frame_id)
                        frame_nn.setWidth(input_frame.shape[2])
                        frame_nn.setHeight(input_frame.shape[1])
                        frame_nn.setType(dai.RawImgFrame.Type.BGR888p)
                        frame_nn.setFrame(input_frame)
                        pose_in_queue.send(frame_nn)
                        frame_id += 1
                    else:
                        frame_id = 0
                        video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)

            # Process pose data whenever a new packet arrives
            if pose_queue.has():
                raw_output = pose_queue.get()
                timer.start_timer("decode")
                keypoints = pose_estimator.get_pose_data(raw_output)
                timer.stop_timer("decode")
                fps.count("NN")
                # When keypoints are available we should also have a
                # passthrough frame to process and display. Make sure it is
                # availabe to avoid suprises.
                if passthrough_queue.has():
                    passthrough = passthrough_queue.get()
                    timer.frame_time("inference", passthrough)
                    passthrough_frame = passthrough.getCvFrame()
                    passthrough_frame = pose_estimator.get_original_frame(
                        passthrough_frame)
                    pose_estimator.draw_results(keypoints, passthrough_frame)
                    cv2.imshow(passthrough_window_name, passthrough_frame)

            # Annotate current video frame with keypoints and FPS
            if keypoints is not None:
                pose_estimator.draw_results(keypoints, frame)
            fps.update()
            fps.display(frame)

            cv2.imshow(video_window_name, frame)

            if cv2.waitKey(1) == ord("q"):
                break
        fps.print_totals()
        timer.print_times()
        cv2.destroyAllWindows()
Esempio n. 2
0
def main(args):
    """
    Main programm loop.

    Parameters
    ----------
    args : command line arguments parsed by parse_arguments
    """
    # Set up PoseEstimator and pipeline  and load the dataset
    if args["model"] not in model_list:
        raise ValueError("Unknown model '{}'".format(args["model"]))
    model_config = model_list[args["model"]]
    pose_estimator = get_poseestimator(model_config, **args)

    with dai.Device(
            create_pipeline(model_config, camera=False, sync=True,
                            **args)) as device:
        device.startPipeline()

        pose_in_queue = device.getInputQueue("pose_in")
        pose_queue = device.getOutputQueue("pose")

        # Load coco keypoint annotations
        coco_data = COCOData(**args)
        # The keypoint selector allows to subset and re-order the predicted
        # keypoints to align with the annotation format of the COCO dataset
        keypoint_selector = coco_data.get_keypoint_selector(
            pose_estimator.landmarks)
        results_filename = "results_{model}_{conf}.json".format(
            model=args["model"], conf=args["detection_threshold"])

        # Re-use saved results if available. Iterate over dataset if not.
        if not os.path.exists(results_filename):
            timer = Timer("inference")
            results = []
            for img_id in trange(len(coco_data)):
                img = coco_data.get_image(img_id)

                input_frame = pose_estimator.get_input_frame(img)
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(img_id)
                frame_nn.setWidth(input_frame.shape[2])
                frame_nn.setHeight(input_frame.shape[1])
                frame_nn.setFrame(input_frame)
                timer.start_timer("inference")
                pose_in_queue.send(frame_nn)

                raw_output = pose_queue.get()
                timer.stop_timer("inference")
                pred_keypoints = pose_estimator.get_pose_data(raw_output)

                # Convert each individual person into output format expected by
                # COCO evaluation tools
                for i in range(pred_keypoints.shape[0]):
                    score = pred_keypoints[i, :, 2]
                    score = np.sum(score) / np.count_nonzero(score)
                    pred_keypoints[i, :, 2] = 1
                    keypoints = np.around(pred_keypoints[i])
                    keypoints = keypoints[keypoint_selector]
                    results.append({
                        "image_id":
                        coco_data.get_coco_imageid(img_id),
                        "category_id":
                        1,
                        "keypoints":
                        keypoints.flatten().tolist(),
                        "score":
                        score
                    })

            with open(results_filename, "w") as results_file:
                json.dump(results, results_file)
            timer.print_times()

        coco_data.evaluate_results(results_filename)
Esempio n. 3
0
def main(args):
    """
    Main programm loop.

    Parameters
    ----------
    args : command line arguments parsed by parse_arguments
    """
    # Set up PoseEstimator, pipeline, window with sliders for PoseEstimator
    # options and load image
    if args["model"] not in model_list:
        raise ValueError("Unknown model '{}'".format(args["model"]))
    model_config = model_list[args["model"]]
    pose_estimator = get_poseestimator(model_config, **args)

    with dai.Device(create_pipeline(model_config, camera=False,
                                    **args)) as device:
        device.startPipeline()

        pose_in_queue = device.getInputQueue("pose_in")
        pose_queue = device.getOutputQueue("pose")

        if not os.path.exists(args["image"]):
            raise ValueError("Image '{}' does not exist.".format(
                args["image"]))
        print("Loading image", args["image"])
        image = cv2.imread(args["image"])

        window = SliderWindow("preview")
        window.add_poseestimator_options(pose_estimator, args)

        # Start main loop
        frame = None
        keypoints = None
        raw_output = None
        redraw = True
        timer = Timer("inference", "decode")
        while True:
            # Check for and handle slider changes, redraw if there was a change
            slider_changes = window.get_changes()
            for option_name, value in slider_changes.items():
                pose_estimator.set_option(option_name, value)
                redraw = True

            # On the first iteration pass the image to the NN for inference
            # Raw results are kept after so changes in PoseEstimator options
            # only require decoding the results again, not another inference
            if frame is None:
                frame = image.copy()
                # Create DepthAI ImgFrame object to pass to the camera
                input_frame = pose_estimator.get_input_frame(frame)
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(0)
                frame_nn.setWidth(input_frame.shape[2])
                frame_nn.setHeight(input_frame.shape[1])
                frame_nn.setFrame(input_frame)
                timer.start_timer("inference")
                pose_in_queue.send(frame_nn)

            # Store the raw results once available
            if pose_queue.has():
                raw_output = pose_queue.get()
                timer.stop_timer("inference")

            # Once we've got the raw output and again whenever an option
            # changes we need to decode and draw
            if redraw and raw_output is not None:
                # keep a clean copy of the image for redrawing
                frame = image.copy()
                timer.start_timer("decode")
                keypoints = pose_estimator.get_pose_data(raw_output)
                timer.stop_timer("decode")
                pose_estimator.draw_results(keypoints, frame)
                redraw = False

            cv2.imshow("preview", frame)

            if cv2.waitKey(1) == ord("q"):
                break
        cv2.destroyAllWindows()
        timer.print_times()