def worker_tracking(detect_q, tracker_q): while True: box_detects = detect_q.get() frame = input_q.get() detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = tracker.update(detections=detections) # norfair.draw_boxes(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) tracker_q.put(frame)
def run(self): print("Thread tracking start") while self.stt_queue.empty(): box_detects, frame = self.detect_queue.get() detections = [ Detection(get_center(box), data=box) for box in box_detects ] tracked_objects = self.tracker.update(detections=detections) for box in box_detects: draw_border(frame, box) norfair.draw_tracked_objects(frame, tracked_objects) self.track_queue.put(frame)
def video( input_file: Path = typer.Argument( ..., file_okay=True, dir_okay=False, ), output_file: Path = typer.Option( "./output/norfair-test.mp4", file_okay=True, dir_okay=False, ), max_distance: int = typer.Option(60), debug: bool = typer.Option(False), ): """ Runs vehicle detection on frames of a video. Outputs a directory of images ready for processing with the ``images`` command. XXX not actually ready yet, I'm currently testing `norfair` package which tracks detections through time so I can be smart about outputing only the largest and most clear frame of a vehicle rather than many similiar frames of the same vehicle. """ yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net() video = Video(input_path=str(input_file), output_path=str(output_file)) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance, ) for frame in video: detections = detect_objects(yolo_net, yolo_labels, yolo_layers, yolo_colors, frame) detections = list( filter(lambda d: d["label"] in VEHICLE_CLASSES, detections)) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections ] tracked_objects = tracker.update(detections=detections) import pdb pdb.set_trace() norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
def handle(self, rgb_depth, detections): """run tracker on the current rgb_depth frame for the detections found""" logging.info("In TrackingHandlerNorfair ... ") detections = self.to_norfair(detections) self.tracked_objects = self.tracker.update(detections, period=4) img = rgb_depth.rgb if not self.silent: print_objects_as_table(self.tracked_objects) if os.getenv("DEBUG_DRAW") == "True": draw_tracked_objects(img, self.tracked_objects) cv2.imshow("Norfair", img) tf = tempfile.NamedTemporaryFile( prefix="norfair_" + str(datetime.now()) + "_locobot_capture_" + "_", suffix=".jpg", dir=".", delete=False, ) cv2.imwrite(tf.name, img) cv2.waitKey(3)
y2 = yolo_box[3] * img_height return np.array([(x1 + x2) / 2, (y1 + y2) / 2]) parser = argparse.ArgumentParser(description="Track human poses in a video.") parser.add_argument("files", type=str, nargs="+", help="Video files to process") args = parser.parse_args() model = YOLO("yolov4.pth") # set use_cuda=False if using CPU for input_path in args.files: video = Video(input_path=input_path) tracker = Tracker( distance_function=euclidean_distance, distance_threshold=max_distance_between_points, ) for frame in video: detections = model(frame) detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 2 ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections) norfair.draw_tracked_objects(frame, tracked_objects) video.write(frame)
buffer_ratio=0.0, ) if args.time: toc = time.time() print('OD infer duration: {:0.3f}'.format(toc - tic)) # MOTracking norfair_dets = [ Detection(center_point) for center_point, score, pred_class_name in bbs ] tracks = tracker.update(detections=norfair_dets) if args.time: toc2 = time.time() print('norfair infer duration: {:0.5f}'.format(toc2 - toc)) show_frame = frame.copy() draw_tracked_objects(show_frame, tracks) # drawer.draw_status(show_frame, status=True) # if display and mouse_dict["click"]: # chosen_track = choose( # # mouse_dict["click"], det_thread_dict["tracks"] # mouse_dict["click"], tracks # ) # if chosen_track: # print(f"CHOSEN TRACK {chosen_track.track_id}") # mouse_dict["click"] = None # if display or out_vid: # drawer.draw_tracks( # show_frame, # tracks,
detector(op.VectorDatum([datum])) detected_poses = datum.poseKeypoints if detected_poses is not None: openpose_detections = ([] if not detected_poses.any() else [ Detection(p, scores=s, label=0) for (p, s) in zip( detected_poses[:, :, :2], detected_poses[:, :, 2]) ]) else: openpose_detections = [] yolo_out = model(frame, conf_threshold=args.conf_thres, iou_threshold=args.iou_thresh, image_size=args.img_size, classes=args.classes) yolo_detections = yolo_detections_to_norfair_detections( yolo_out, track_points=args.track_points) detections = openpose_detections + yolo_detections tracked_objects = tracker.update(detections=detections) norfair.draw_tracked_objects( frame, [person for person in tracked_objects if person.label == 0], color_by_label=True) norfair.draw_tracked_boxes( frame, [obj for obj in tracked_objects if obj.label != 0], color_by_label=True) video.write(frame)
def update(self): # final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() # write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) # print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 #pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] global keypoint_dist_threshold keypoint_dist_threshold = orig_img.shape[0] / 30 detections = [ norfair.Detection(p['keypoints'].numpy(), scores=p['kp_score'].squeeze().numpy()) for p in result['result'] ] tracked_objects = self.tracker.update(detections=detections) norfair.draw_tracked_objects(orig_img.copy(), tracked_objects) # self.cropped(orig_img.copy(), tracked_objects) face.export_face_img(tracked_objects, orig_img.copy(), os.path.join(self.opt.outputpath, 'vis'), vdo_fname='id') # final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt) self.write_image( img, im_name, stream=stream if self.save_video else None)
boxes_valid = boxes[scores > 0.7] classes_int_valid = classes_int[scores > 0.7] scores_valid = scores[scores > 0.7] for box in boxes_valid: centroids_nor.append(get_centroid(box, H, W)) detections_nor = [Detection(point) for point in centroids_nor] tracked_objects = tracker.update(detections=detections_nor, period=args["skip_frames"]) else: tracked_objects = tracker.update() draw_tracked_objects(image_np, tracked_objects, radius=10, id_size=2) for person in tracked_objects: # print(person.id) # print(person.estimate[0]) to = trackableObjects.get(person.id, None) if to is None: to = TrackableObject(person.id, person.estimate[0]) else: x = [c[0] for c in to.centroids] y = [c[1] for c in to.centroids] direction_x = person.estimate[0][0] - np.mean(x) direction_y = person.estimate[0][1] - np.mean(y) direction_pre_x = person.estimate[0][0] - to.centroids[-1][0]
classes_model=args.classes_model) #__call__ method detections = [ Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if not args.classes_track or box[-1] in args.classes_track #select the classes you want to track ] tracked_objects = tracker.update(detections=detections) norfair.draw_points(frame, detections, radius=10, thickness=5, color=norfair.Color.green) norfair.draw_tracked_objects(frame, tracked_objects, id_size=5, id_thickness=10, color=norfair.Color.green) if (args.debug): norfair.draw_debug_metrics(frame, tracked_objects) #debug (optional) #detections_c1 = [Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 1] #tracked_objects_c1 = tracker_c1.update(detections=detections_c1) #norfair.draw_points(frame, detections_c1, color="green") #norfair.draw_tracked_objects(frame, tracked_objects_c1, color="green") #norfair.draw_debug_metrics(frame, tracked_objects_c1) #debug video.write(frame) #video.show(frame)
from norfair import Detection, Tracker, Video, draw_tracked_objects # Set up Detectron2 object detector cfg = get_cfg() cfg.merge_from_file("./detectron2_config.yaml") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" detector = DefaultPredictor(cfg) # Distance function def centroid_distance(detection, tracked_object): return np.linalg.norm(detection.points - tracked_object.estimate) # Norfair video = Video(input_path="./video.mp4") tracker = Tracker(distance_function=centroid_distance, distance_threshold=20) for frame in video: detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Wrap Detectron2 detections in Norfair's Detection objects detections = [ Detection(p) for p, c in zip( detections["instances"].pred_boxes.get_centers().cpu().numpy(), detections["instances"].pred_classes) if c == 2 ] tracked_objects = tracker.update(detections=detections) draw_tracked_objects(frame, tracked_objects) video.write(frame)