Example #1
0
def worker_tracking(detect_q, tracker_q):
    while True:
        box_detects = detect_q.get()
        frame = input_q.get()
        detections = [
            Detection(get_center(box), data=box) for box in box_detects
        ]
        tracked_objects = tracker.update(detections=detections)
        # norfair.draw_boxes(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        tracker_q.put(frame)
Example #2
0
    def run(self):
        print("Thread tracking start")

        while self.stt_queue.empty():
            box_detects, frame = self.detect_queue.get()
            detections = [
                Detection(get_center(box), data=box) for box in box_detects
            ]
            tracked_objects = self.tracker.update(detections=detections)
            for box in box_detects:
                draw_border(frame, box)

            norfair.draw_tracked_objects(frame, tracked_objects)
            self.track_queue.put(frame)
Example #3
0
def video(
        input_file: Path = typer.Argument(
            ...,
            file_okay=True,
            dir_okay=False,
        ),
        output_file: Path = typer.Option(
            "./output/norfair-test.mp4",
            file_okay=True,
            dir_okay=False,
        ),
        max_distance: int = typer.Option(60),
        debug: bool = typer.Option(False),
):
    """
    Runs vehicle detection on frames of a video.
    Outputs a directory of images ready for processing with the ``images`` command.

    XXX not actually ready yet, I'm currently testing `norfair` package which tracks
    detections through time so I can be smart about outputing only the largest and 
    most clear frame of a vehicle rather than many similiar frames of the same vehicle.
    """
    yolo_net, yolo_labels, yolo_colors, yolo_layers = load_yolo_net()

    video = Video(input_path=str(input_file), output_path=str(output_file))
    tracker = Tracker(
        distance_function=euclidean_distance,
        distance_threshold=max_distance,
    )

    for frame in video:
        detections = detect_objects(yolo_net, yolo_labels, yolo_layers,
                                    yolo_colors, frame)
        detections = list(
            filter(lambda d: d["label"] in VEHICLE_CLASSES, detections))
        detections = [
            Detection(get_centroid(box, frame.shape[0], frame.shape[1]),
                      data=box) for box in detections
        ]
        tracked_objects = tracker.update(detections=detections)
        import pdb
        pdb.set_trace()
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
Example #4
0
    def handle(self, rgb_depth, detections):
        """run tracker on the current rgb_depth frame for the detections found"""
        logging.info("In TrackingHandlerNorfair ... ")
        detections = self.to_norfair(detections)
        self.tracked_objects = self.tracker.update(detections, period=4)
        img = rgb_depth.rgb

        if not self.silent:
            print_objects_as_table(self.tracked_objects)
        if os.getenv("DEBUG_DRAW") == "True":
            draw_tracked_objects(img, self.tracked_objects)
            cv2.imshow("Norfair", img)
            tf = tempfile.NamedTemporaryFile(
                prefix="norfair_" + str(datetime.now()) + "_locobot_capture_" +
                "_",
                suffix=".jpg",
                dir=".",
                delete=False,
            )
            cv2.imwrite(tf.name, img)
            cv2.waitKey(3)
Example #5
0
    y2 = yolo_box[3] * img_height
    return np.array([(x1 + x2) / 2, (y1 + y2) / 2])


parser = argparse.ArgumentParser(description="Track human poses in a video.")
parser.add_argument("files",
                    type=str,
                    nargs="+",
                    help="Video files to process")
args = parser.parse_args()

model = YOLO("yolov4.pth")  # set use_cuda=False if using CPU

for input_path in args.files:
    video = Video(input_path=input_path)
    tracker = Tracker(
        distance_function=euclidean_distance,
        distance_threshold=max_distance_between_points,
    )

    for frame in video:
        detections = model(frame)
        detections = [
            Detection(get_centroid(box, frame.shape[0], frame.shape[1]),
                      data=box) for box in detections if box[-1] == 2
        ]
        tracked_objects = tracker.update(detections=detections)
        norfair.draw_points(frame, detections)
        norfair.draw_tracked_objects(frame, tracked_objects)
        video.write(frame)
Example #6
0
        buffer_ratio=0.0,
    )
    if args.time:
        toc = time.time()
        print('OD infer duration: {:0.3f}'.format(toc - tic))

    # MOTracking
    norfair_dets = [
        Detection(center_point) for center_point, score, pred_class_name in bbs
    ]
    tracks = tracker.update(detections=norfair_dets)
    if args.time:
        toc2 = time.time()
        print('norfair infer duration: {:0.5f}'.format(toc2 - toc))
    show_frame = frame.copy()
    draw_tracked_objects(show_frame, tracks)
    # drawer.draw_status(show_frame, status=True)

    # if display and mouse_dict["click"]:
    #     chosen_track = choose(
    #         # mouse_dict["click"], det_thread_dict["tracks"]
    #         mouse_dict["click"], tracks
    #     )
    #     if chosen_track:
    #         print(f"CHOSEN TRACK {chosen_track.track_id}")
    #     mouse_dict["click"] = None

    # if display or out_vid:
    #     drawer.draw_tracks(
    #         show_frame,
    #         tracks,
Example #7
0
            detector(op.VectorDatum([datum]))
            detected_poses = datum.poseKeypoints

            if detected_poses is not None:
                openpose_detections = ([] if not detected_poses.any() else [
                    Detection(p, scores=s, label=0) for (p, s) in zip(
                        detected_poses[:, :, :2], detected_poses[:, :, 2])
                ])
            else:
                openpose_detections = []

            yolo_out = model(frame,
                             conf_threshold=args.conf_thres,
                             iou_threshold=args.iou_thresh,
                             image_size=args.img_size,
                             classes=args.classes)
            yolo_detections = yolo_detections_to_norfair_detections(
                yolo_out, track_points=args.track_points)
            detections = openpose_detections + yolo_detections

            tracked_objects = tracker.update(detections=detections)

            norfair.draw_tracked_objects(
                frame,
                [person for person in tracked_objects if person.label == 0],
                color_by_label=True)
            norfair.draw_tracked_boxes(
                frame, [obj for obj in tracked_objects if obj.label != 0],
                color_by_label=True)

            video.write(frame)
Example #8
0
    def update(self):
        # final_result = []
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                # write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval)
                # print("Results have been written to json.")
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4
                #pred = hm_data.cpu().data.numpy()

                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0, 136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0, 26)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area)

                _result = []
                for k in range(len(scores)):
                    _result.append({
                        'keypoints':
                        preds_img[k],
                        'kp_score':
                        preds_scores[k],
                        'proposal_score':
                        torch.mean(preds_scores[k]) + scores[k] +
                        1.25 * max(preds_scores[k]),
                        'idx':
                        ids[k],
                        'box': [
                            boxes[k][0], boxes[k][1],
                            boxes[k][2] - boxes[k][0],
                            boxes[k][3] - boxes[k][1]
                        ]
                    })

                result = {'imgname': im_name, 'result': _result}

                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                global keypoint_dist_threshold
                keypoint_dist_threshold = orig_img.shape[0] / 30
                detections = [
                    norfair.Detection(p['keypoints'].numpy(),
                                      scores=p['kp_score'].squeeze().numpy())
                    for p in result['result']
                ]
                tracked_objects = self.tracker.update(detections=detections)
                norfair.draw_tracked_objects(orig_img.copy(), tracked_objects)

                # self.cropped(orig_img.copy(), tracked_objects)
                face.export_face_img(tracked_objects,
                                     orig_img.copy(),
                                     os.path.join(self.opt.outputpath, 'vis'),
                                     vdo_fname='id')

                # final_result.append(result)
                if self.opt.save_img or self.save_video or self.opt.vis:

                    if hm_data.size()[1] == 49:
                        from alphapose.utils.vis import vis_frame_dense as vis_frame
                    elif self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame
                    else:
                        from alphapose.utils.vis import vis_frame
                    img = vis_frame(orig_img, result, self.opt)
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None)
        boxes_valid = boxes[scores > 0.7]
        classes_int_valid = classes_int[scores > 0.7]
        scores_valid = scores[scores > 0.7]

        for box in boxes_valid:
            centroids_nor.append(get_centroid(box, H, W))

        detections_nor = [Detection(point) for point in centroids_nor]
        tracked_objects = tracker.update(detections=detections_nor,
                                         period=args["skip_frames"])

    else:
        tracked_objects = tracker.update()

    draw_tracked_objects(image_np, tracked_objects, radius=10, id_size=2)

    for person in tracked_objects:
        # print(person.id)
        # print(person.estimate[0])

        to = trackableObjects.get(person.id, None)

        if to is None:
            to = TrackableObject(person.id, person.estimate[0])
        else:
            x = [c[0] for c in to.centroids]
            y = [c[1] for c in to.centroids]
            direction_x = person.estimate[0][0] - np.mean(x)
            direction_y = person.estimate[0][1] - np.mean(y)
            direction_pre_x = person.estimate[0][0] - to.centroids[-1][0]
Example #10
0
                classes_model=args.classes_model)  #__call__ method
            detections = [
                Detection(get_centroid(box, frame.shape[0], frame.shape[1]),
                          data=box) for box in detections
                if not args.classes_track or box[-1] in
                args.classes_track  #select the classes you want to track
            ]
            tracked_objects = tracker.update(detections=detections)
            norfair.draw_points(frame,
                                detections,
                                radius=10,
                                thickness=5,
                                color=norfair.Color.green)
            norfair.draw_tracked_objects(frame,
                                         tracked_objects,
                                         id_size=5,
                                         id_thickness=10,
                                         color=norfair.Color.green)
            if (args.debug):
                norfair.draw_debug_metrics(frame,
                                           tracked_objects)  #debug (optional)

            #detections_c1 = [Detection(get_centroid(box, frame.shape[0], frame.shape[1]), data=box) for box in detections if box[-1] == 1]
            #tracked_objects_c1 = tracker_c1.update(detections=detections_c1)
            #norfair.draw_points(frame, detections_c1, color="green")
            #norfair.draw_tracked_objects(frame, tracked_objects_c1, color="green")
            #norfair.draw_debug_metrics(frame, tracked_objects_c1) #debug

            video.write(frame)
            #video.show(frame)
Example #11
0
from norfair import Detection, Tracker, Video, draw_tracked_objects

# Set up Detectron2 object detector
cfg = get_cfg()
cfg.merge_from_file("./detectron2_config.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
detector = DefaultPredictor(cfg)


# Distance function
def centroid_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)


# Norfair
video = Video(input_path="./video.mp4")
tracker = Tracker(distance_function=centroid_distance, distance_threshold=20)

for frame in video:
    detections = detector(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    # Wrap Detectron2 detections in Norfair's Detection objects
    detections = [
        Detection(p) for p, c in zip(
            detections["instances"].pred_boxes.get_centers().cpu().numpy(),
            detections["instances"].pred_classes) if c == 2
    ]
    tracked_objects = tracker.update(detections=detections)
    draw_tracked_objects(frame, tracked_objects)
    video.write(frame)