def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Setup PoseEstimator, pipeline, windows with sliders for PoseEstimator # options and load video if running on local video file camera = args["video"] is None if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device( create_pipeline(model_config, camera, passthrough=True, **args)) as device: device.startPipeline() if camera: preview_queue = device.getOutputQueue("preview", maxSize=4, blocking=False) else: pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") passthrough_queue = device.getOutputQueue("passthrough") # Load video if given in command line and set the variables used below # to control FPS and looping of the video if not camera: if not os.path.exists(args["video"]): raise ValueError("Video '{}' does not exist.".format( args["video"])) print("Loading video", args["video"]) video = cv2.VideoCapture(args["video"]) frame_interval = 1 / video.get(cv2.CAP_PROP_FPS) last_frame_time = 0 frame_id = 0 else: print("Running on OAK camera preview stream") # Create windows for the original video and the video of frames from # the NN passthrough. The window for the original video gets all the # option sliders to change pose estimator config video_window_name = "Original Video" passthrough_window_name = "Processed Video" video_window = SliderWindow(video_window_name) cv2.namedWindow(passthrough_window_name) video_window.add_poseestimator_options(pose_estimator, args) # Start main loop frame = None keypoints = None fps = FPS("Video", "NN", interval=0.1) timer = Timer("inference", "decode") while True: # Check for and handle slider changes slider_changes = video_window.get_changes() for option_name, value in slider_changes.items(): pose_estimator.set_option(option_name, value) fps.start_frame() # Get next video frame (and submit for processing if local video) if camera: frame = preview_queue.get().getCvFrame() fps.count("Video") else: frame_time = time.perf_counter() # Only grab next frame from file at certain intervals to # roughly preserve its original FPS if frame_time - last_frame_time > frame_interval: if video.grab(): __, frame = video.retrieve() fps.count("Video") last_frame_time = frame_time # Create DepthAI ImgFrame object to pass to the # camera input_frame = pose_estimator.get_input_frame(frame) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(frame_id) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setType(dai.RawImgFrame.Type.BGR888p) frame_nn.setFrame(input_frame) pose_in_queue.send(frame_nn) frame_id += 1 else: frame_id = 0 video.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # Process pose data whenever a new packet arrives if pose_queue.has(): raw_output = pose_queue.get() timer.start_timer("decode") keypoints = pose_estimator.get_pose_data(raw_output) timer.stop_timer("decode") fps.count("NN") # When keypoints are available we should also have a # passthrough frame to process and display. Make sure it is # availabe to avoid suprises. if passthrough_queue.has(): passthrough = passthrough_queue.get() timer.frame_time("inference", passthrough) passthrough_frame = passthrough.getCvFrame() passthrough_frame = pose_estimator.get_original_frame( passthrough_frame) pose_estimator.draw_results(keypoints, passthrough_frame) cv2.imshow(passthrough_window_name, passthrough_frame) # Annotate current video frame with keypoints and FPS if keypoints is not None: pose_estimator.draw_results(keypoints, frame) fps.update() fps.display(frame) cv2.imshow(video_window_name, frame) if cv2.waitKey(1) == ord("q"): break fps.print_totals() timer.print_times() cv2.destroyAllWindows()
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Set up PoseEstimator and pipeline and load the dataset if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device( create_pipeline(model_config, camera=False, sync=True, **args)) as device: device.startPipeline() pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") # Load coco keypoint annotations coco_data = COCOData(**args) # The keypoint selector allows to subset and re-order the predicted # keypoints to align with the annotation format of the COCO dataset keypoint_selector = coco_data.get_keypoint_selector( pose_estimator.landmarks) results_filename = "results_{model}_{conf}.json".format( model=args["model"], conf=args["detection_threshold"]) # Re-use saved results if available. Iterate over dataset if not. if not os.path.exists(results_filename): timer = Timer("inference") results = [] for img_id in trange(len(coco_data)): img = coco_data.get_image(img_id) input_frame = pose_estimator.get_input_frame(img) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(img_id) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setFrame(input_frame) timer.start_timer("inference") pose_in_queue.send(frame_nn) raw_output = pose_queue.get() timer.stop_timer("inference") pred_keypoints = pose_estimator.get_pose_data(raw_output) # Convert each individual person into output format expected by # COCO evaluation tools for i in range(pred_keypoints.shape[0]): score = pred_keypoints[i, :, 2] score = np.sum(score) / np.count_nonzero(score) pred_keypoints[i, :, 2] = 1 keypoints = np.around(pred_keypoints[i]) keypoints = keypoints[keypoint_selector] results.append({ "image_id": coco_data.get_coco_imageid(img_id), "category_id": 1, "keypoints": keypoints.flatten().tolist(), "score": score }) with open(results_filename, "w") as results_file: json.dump(results, results_file) timer.print_times() coco_data.evaluate_results(results_filename)
def main(args): """ Main programm loop. Parameters ---------- args : command line arguments parsed by parse_arguments """ # Set up PoseEstimator, pipeline, window with sliders for PoseEstimator # options and load image if args["model"] not in model_list: raise ValueError("Unknown model '{}'".format(args["model"])) model_config = model_list[args["model"]] pose_estimator = get_poseestimator(model_config, **args) with dai.Device(create_pipeline(model_config, camera=False, **args)) as device: device.startPipeline() pose_in_queue = device.getInputQueue("pose_in") pose_queue = device.getOutputQueue("pose") if not os.path.exists(args["image"]): raise ValueError("Image '{}' does not exist.".format( args["image"])) print("Loading image", args["image"]) image = cv2.imread(args["image"]) window = SliderWindow("preview") window.add_poseestimator_options(pose_estimator, args) # Start main loop frame = None keypoints = None raw_output = None redraw = True timer = Timer("inference", "decode") while True: # Check for and handle slider changes, redraw if there was a change slider_changes = window.get_changes() for option_name, value in slider_changes.items(): pose_estimator.set_option(option_name, value) redraw = True # On the first iteration pass the image to the NN for inference # Raw results are kept after so changes in PoseEstimator options # only require decoding the results again, not another inference if frame is None: frame = image.copy() # Create DepthAI ImgFrame object to pass to the camera input_frame = pose_estimator.get_input_frame(frame) frame_nn = dai.ImgFrame() frame_nn.setSequenceNum(0) frame_nn.setWidth(input_frame.shape[2]) frame_nn.setHeight(input_frame.shape[1]) frame_nn.setFrame(input_frame) timer.start_timer("inference") pose_in_queue.send(frame_nn) # Store the raw results once available if pose_queue.has(): raw_output = pose_queue.get() timer.stop_timer("inference") # Once we've got the raw output and again whenever an option # changes we need to decode and draw if redraw and raw_output is not None: # keep a clean copy of the image for redrawing frame = image.copy() timer.start_timer("decode") keypoints = pose_estimator.get_pose_data(raw_output) timer.stop_timer("decode") pose_estimator.draw_results(keypoints, frame) redraw = False cv2.imshow("preview", frame) if cv2.waitKey(1) == ord("q"): break cv2.destroyAllWindows() timer.print_times()