def video( input_file: Path = typer.Argument( ..., file_okay=True, dir_okay=False, ), output_file: Path = typer.Option( "./output/norfair-test.mp4", file_okay=True, dir_okay=False, ), max_distance: int = typer.Option(60), debug: bool = typer.Option(False), ): """ Runs vehicle detection on frames of a video. Outputs a directory of images ready for processing with the ``images`` command. XXX not actually ready yet, I'm currently testing `norfair` package which tracks detections through time so I can be smart about outputing only the largest and most clear frame of a vehicle rather than many similiar frames of the same vehicle. """ yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net() video = Video(input_path=str(input_file), output_path=str(output_file)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance, ) for frame in video: detections = detect_objects(yolo_net, yolo_labels, yolo_layers, yolo_colors, frame) detections = list( filter(lambda d: d["label"] in VEHICLE_CLASSES, detections)) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections ] tracked_objects = tracker.update(detections=detections) import pdb pdb.set_trace() norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
def tracker_update(frame, result): # AlphaPose result detected_poses = result["result"] converted_detections = convert_and_filter(detected_poses) predictions = tracker.update(converted_detections, dt=1) norfair.draw_points(frame, converted_detections) norfair.draw_predictions(frame, predictions) # Draw on a copy of the frame # frame_processed = frame_img.copy() # for pose in poses: # pos_nose = pose["keypoints"][0, :2].int().numpy() # cv2.rectangle( # frame_processed, tuple(pos_nose - (2, 2)), tuple(pos_nose + (2, 2)), (0, 0, 255), 1 # ) return frame
y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
for frame in video: detections = model( frame, conf=args.conf, iou=args.iou_thres, classes_model=args.classes_model) #__call__ method detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if not args.classes_track or box[-1] in args.classes_track #select the classes you want to track ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections, radius=10, thickness=5, color=norfair.Color.green) norfair.draw_tracked_objects(frame, tracked_objects, id_size=5, id_thickness=10, color=norfair.Color.green) if (args.debug): norfair.draw_debug_metrics(frame, tracked_objects) #debug (optional) #detections_c1 = [Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 1] #tracked_objects_c1 = tracker_c1.update(detections=detections_c1) #norfair.draw_points(frame, detections_c1, color="green") #norfair.draw_tracked_objects(frame, tracked_objects_c1, color="green")