def yolo_detections_to_norfair_detections( yolo_detections: torch.tensor, track_points: str = 'centroid' # bbox or centroid ) -> List[Detection]: """convert detections_as_xywh to norfair detections """ norfair_detections: List[Detection] = [] if track_points == 'centroid': detections_as_xywh = yolo_detections.xywh[0] for detection_as_xywh in detections_as_xywh: centroid = np.array( [detection_as_xywh[0].item(), detection_as_xywh[1].item()]) scores = np.array([detection_as_xywh[4].item()]) label = int(detection_as_xywh[5].item()) norfair_detections.append( Detection(points=centroid, scores=scores, label=label)) elif track_points == 'bbox': detections_as_xyxy = yolo_detections.xyxy[0] for detection_as_xyxy in detections_as_xyxy: bbox = np.array( [[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()], [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()]]) scores = np.array( [detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]) label = int(detection_as_xyxy[5].item()) norfair_detections.append( Detection(points=bbox, scores=scores, label=label)) return norfair_detections
def to_norfair_detections(self, track_points: str = "bbox"): """ Args: track_points (str): 'centroid' or 'bbox'. Defaults to 'bbox'. """ norfair_detections: List[Detection] = [] # convert all detections to norfair detections for annotation in self.annotation_list: # calculate bbox points xmin = annotation.bbox[0] ymin = annotation.bbox[1] xmax = annotation.bbox[0] + annotation.bbox[2] ymax = annotation.bbox[1] + annotation.bbox[3] scores = None # calculate points as bbox or centroid if track_points == "bbox": points = np.array([[xmin, ymin], [xmax, ymax]]) # bbox if annotation.score is not None: scores = np.array([annotation.score, annotation.score]) elif track_points == "centroid": points = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2]) # centroid if annotation.score is not None: scores = np.array([annotation.score]) else: ValueError( "'track_points' should be one of ['centroid', 'bbox'].") # create norfair formatted detection norfair_detections.append(Detection(points=points, scores=scores)) return norfair_detections
def worker_tracking(detect_q, tracker_q): while True: box_detects = detect_q.get() frame = input_q.get() detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = tracker.update(detections=detections) # norfair.draw_boxes(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) tracker_q.put(frame)
def run(self): print("Thread tracking start") while self.stt_queue.empty(): box_detects, frame = self.detect_queue.get() detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = self.tracker.update(detections=detections) for box in box_detects: draw_border(frame, box) norfair.draw_tracked_objects(frame, tracked_objects) self.track_queue.put(frame)
def recieve_object_detection_result(self, object_detection_result): detected_objects = object_detection_result.detected_objects # Convert DetectedObject to norfair.Detection. # Set DetectedObject in data field of norfair.Detection. detections = [ Detection(self.get_center(obj), data=obj) for obj in detected_objects ] tracked_objects = self.tracker.update(detections=detections) objs = [ self.create_detected_object_with_id(obj) for obj in tracked_objects if obj.live_points ] self.pub.publish(ObjectDetectionResult(detected_objects=objs))
def get_dets_from_frame(self, frame_number): """ this function returns a list of norfair Detections class, corresponding to frame=frame_number """ indexes = np.argwhere(self.matrix_detections[:, 0] == frame_number) detections = [] if len(indexes) > 0: actual_det = self.matrix_detections[indexes] actual_det.shape = [actual_det.shape[0], actual_det.shape[2]] for det in actual_det: points = np.array([[det[2], det[3]], [det[4], det[5]]]) conf = det[6] new_detection = Detection(points, np.array([conf, conf])) detections.append(new_detection) self.actual_detections = detections return detections
def video( input_file: Path = typer.Argument( ..., file_okay=True, dir_okay=False, ), output_file: Path = typer.Option( "./output/norfair-test.mp4", file_okay=True, dir_okay=False, ), max_distance: int = typer.Option(60), debug: bool = typer.Option(False), ): """ Runs vehicle detection on frames of a video. Outputs a directory of images ready for processing with the ``images`` command. XXX not actually ready yet, I'm currently testing `norfair` package which tracks detections through time so I can be smart about outputing only the largest and most clear frame of a vehicle rather than many similiar frames of the same vehicle. """ yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net() video = Video(input_path=str(input_file), output_path=str(output_file)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance, ) for frame in video: detections = detect_objects(yolo_net, yolo_labels, yolo_layers, yolo_colors, frame) detections = list( filter(lambda d: d["label"] in VEHICLE_CLASSES, detections)) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections ] tracked_objects = tracker.update(detections=detections) import pdb pdb.set_trace() norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
nargs="+", help="Video files to process") args = parser.parse_args() for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=keypoints_distance, distance_threshold=distance_threshold, detection_threshold=detection_threshold, pointwise_hit_counter_max=2, ) keypoint_dist_threshold = video.input_height / 25 for i, frame in enumerate(video): if i % frame_skip_period == 0: detected_poses = pose_detector(frame) detections = ([] if not detected_poses.any() else [ Detection(p, scores=s) for (p, s) in zip(detected_poses[:, :, :2], detected_poses[:, :, 2]) ]) tracked_objects = tracker.update(detections=detections, period=frame_skip_period) norfair.draw_points(frame, detections) else: tracked_objects = tracker.update() norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
if args.time: tic = time.time() bbs = od.detect_get_box_in( frame, box_format="center_point", classes=od_target_classes, buffer_ratio=0.0, ) if args.time: toc = time.time() print('OD infer duration: {:0.3f}'.format(toc - tic)) # MOTracking norfair_dets = [ Detection(center_point) for center_point, score, pred_class_name in bbs ] tracks = tracker.update(detections=norfair_dets) if args.time: toc2 = time.time() print('norfair infer duration: {:0.5f}'.format(toc2 - toc)) show_frame = frame.copy() draw_tracked_objects(show_frame, tracks) # drawer.draw_status(show_frame, status=True) # if display and mouse_dict["click"]: # chosen_track = choose( # # mouse_dict["click"], det_thread_dict["tracks"] # mouse_dict["click"], tracks # ) # if chosen_track:
distance_threshold=DISTANCE_THRESHOLD, detection_threshold=DETECTION_THRESHOLD, hit_counter_max=HIT_COUNTER_MAX, initialization_delay=INITIALIZATION_DELAY, pointwise_hit_counter_max=POINTWISE_HIT_COUNTER_MAX, ) KEYPOINT_DIST_THRESHOLD = video.input_height / 40 for frame in video: datum.cvInputData = frame detector(op.VectorDatum([datum])) detected_poses = datum.poseKeypoints if detected_poses is not None: openpose_detections = ([] if not detected_poses.any() else [ Detection(p, scores=s, label=0) for (p, s) in zip( detected_poses[:, :, :2], detected_poses[:, :, 2]) ]) else: openpose_detections = [] yolo_out = model(frame, conf_threshold=args.conf_thres, iou_threshold=args.iou_thresh, image_size=args.img_size, classes=args.classes) yolo_detections = yolo_detections_to_norfair_detections( yolo_out, track_points=args.track_points) detections = openpose_detections + yolo_detections tracked_objects = tracker.update(detections=detections)
# %% # Identifying only a person boxes = detections['detection_boxes'][0].numpy() classes = detections['detection_classes'][0].numpy() classes_int = (classes + label_id_offset).astype(int) scores = detections['detection_scores'][0].numpy() boxes_valid = boxes[scores > 0.7] classes_int_valid = classes_int[scores > 0.7] scores_valid = scores[scores > 0.7] for box in boxes_valid: centroids_nor.append(get_centroid(box, H, W)) detections_nor = [Detection(point) for point in centroids_nor] tracked_objects = tracker.update(detections=detections_nor, period=args["skip_frames"]) else: tracked_objects = tracker.update() draw_tracked_objects(image_np, tracked_objects, radius=10, id_size=2) for person in tracked_objects: # print(person.id) # print(person.estimate[0]) to = trackableObjects.get(person.id, None) if to is None:
x1 = yolo_box[0] * img_width y1 = yolo_box[1] * img_height x2 = yolo_box[2] * img_width y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) # set use_cuda=False if using CPU # for input_path in args.files: video = Video(input_path='/home/sonnh/Downloads/town_cut.mp4', output_fps=30.0) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) frame_num = -1 for frame in video: frame_num += 1 if frame_num % 2 == 0: frame = np.array(frame) box_detects, _, _ = detector.detect(frame) detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
from norfair import Detection, Tracker, Video, draw_tracked_objects # Set up Detectron2 object detector cfg = get_cfg() cfg.merge_from_file("./detectron2_config.yaml") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" detector = DefaultPredictor(cfg) # Distance function def centroid_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) # Norfair video = Video(input_path="./video.mp4") tracker = Tracker(distance_function=centroid_distance, distance_threshold=20) for frame in video: detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Wrap Detectron2 detections in Norfair's Detection objects detections = [ Detection(p) for p, c in zip( detections["instances"].pred_boxes.get_centers().cpu().numpy(), detections["instances"].pred_classes) if c == 2 ] tracked_objects = tracker.update(detections=detections) draw_tracked_objects(frame, tracked_objects) video.write(frame)
# capture the next image img = input.Capture() np_source = jetson.utils.cudaToNumpy(img) np_source = cv2.cvtColor(np_source, cv2.COLOR_RGBA2BGR) detections = net.Detect(img, overlay='none') chosen_track = None # print the detections print("detected {:d} objects in image".format(len(detections))) raw_dets = [] for detection in detections: center_x = detection.Left + detection.Width / 2 center_y = detection.Top + detection.Height / 2 raw_dets.append((center_x, center_y)) norfair_dets = [Detection(center_point) for center_point in raw_dets] tracks = tracker.update(detections=norfair_dets) show_frame = np_source.copy() draw_tracked_objects(show_frame, tracks) cv2.imshow("webcam", show_frame) k = cv2.waitKey(30) if k == ord('q'): break # print out performance info net.PrintProfilerTimes() # exit on input/output EOS if not input.IsStreaming():
def doTracking(data_dict, first_id): sortedKeys = natsorted(data_dict.keys()) tracker = Tracker(distance_function=euclidean_distance, distance_threshold=700, point_transience=1, hit_inertia_min=1, hit_inertia_max=75, init_delay=25) max_id = first_id first_frame = 0 last_frame = 0 if (len(sortedKeys) > 0): first_frame = int(sortedKeys[0].split('.')[0]) last_frame = int(sortedKeys[-1].split('.')[0]) for ii in range(first_frame, last_frame + 1): curr_key = '{0:05d}'.format(ii) + '.jpg' detections = [] if curr_key in sortedKeys: im_dict = data_dict[curr_key] cv2.imread(im_dict["full_im_path"]) people = im_dict['people'] np.zeros((len(people), 2)) for kk in range(len(people)): person = people[kk] if person['valid_sub_im']: center = np.array(person['head_pos']) detections.append(Detection(center)) tracked_objects = tracker.update(detections=detections) # draw_tracked_objects(img, tracked_objects) people = im_dict['people'] for kk in range(len(people)): person = people[kk] person['ID'] = -1 sz = max(len(people), len(tracked_objects)) all_dists = np.ones((sz, sz)) * math.inf for kk in range(len(people)): person = people[kk] c = np.array(person['head_pos']) if (person['valid_sub_im'] == True): for tt in range(len(tracked_objects)): tracked_object = tracked_objects[tt] ct = tracked_object.estimate distance = math.sqrt(((c[0] - ct[0][0])**2) + ((c[1] - ct[0][1])**2)) all_dists[kk, tt] = distance for kk in range(len(people)): min_overall = np.amin(all_dists) if (min_overall == math.inf or min_overall > 75): break min_idxs = np.where(all_dists == np.amin(all_dists)) try: min_person = int(min_idxs[0]) min_tracked_obj = int(min_idxs[1]) person = people[min_person] all_dists[:, min_tracked_obj] = math.inf all_dists[min_person, :] = math.inf tracked_object = tracked_objects[min_tracked_obj] person['ID'] = first_id + tracked_object.id - 1 if max_id < person['ID']: max_id = person['ID'] except: print('No min dists? Skipping') else: tracker.update(detections=detections) return data_dict, max_id
y1 = yolo_box[1] * img_height x2 = yolo_box[2] * img_width y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
def to_norfair_trackedobjects(self, track_points: str = "bbox"): """ Args: track_points (str): 'centroid' or 'bbox'. Defaults to 'bbox'. """ tracker = Tracker( distance_function=euclidean_distance, distance_threshold=30, detection_threshold=0, hit_inertia_min=0, hit_inertia_max=12, point_transience=4, ) tracked_object_list: List[TrackedObject] = [] # convert all detections to norfair detections for annotation in self.annotation_list: # ensure annotation.track_id is not None assert annotation.track_id is not None, TypeError( "to_norfair_trackedobjects() requires annotation.track_id to be set." ) # calculate bbox points xmin = annotation.bbox[0] ymin = annotation.bbox[1] xmax = annotation.bbox[0] + annotation.bbox[2] ymax = annotation.bbox[1] + annotation.bbox[3] track_id = annotation.track_id scores = None # calculate points as bbox or centroid if track_points == "bbox": points = np.array([[xmin, ymin], [xmax, ymax]]) # bbox if annotation.score is not None: scores = np.array([annotation.score, annotation.score]) elif track_points == "centroid": points = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2]) # centroid if annotation.score is not None: scores = np.array([annotation.score]) else: ValueError( "'track_points' should be one of ['centroid', 'bbox'].") # create norfair formatted detection detection = Detection(points=points, scores=scores) # create trackedobject from norfair detection tracked_object = TrackedObject( detection, tracker.hit_inertia_min, tracker.hit_inertia_max, tracker.initialization_delay, tracker.detection_threshold, period=1, point_transience=tracker.point_transience, filter_setup=tracker.filter_setup, ) tracked_object.id = track_id tracked_object.point_hit_counter = np.ones( tracked_object.num_points) * 1 # append to tracked_object_list tracked_object_list.append(tracked_object) return tracked_object_list