Exemple #1
0
class MultiObjectSORTTracker(MultiObjectTracker):
    def __init__(self, flags, logger):
        self._logger = logger
        self.tracker = Sort(max_age=flags.obstacle_track_max_age,
                            min_hits=1,
                            min_iou=flags.min_matching_iou)

    def reinitialize(self, frame, obstacles):
        """ Reinitializes a multiple obstacle tracker.

        Args:
            frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`):
                Frame to reinitialize with.
            obstacles : List of perception.detection.obstacle.Obstacle.
        """
        detections, labels, ids = self.convert_detections_for_sort_alg(
            obstacles)
        self.tracker.update(detections, labels, ids)

    def track(self, frame):
        """ Tracks obstacles in a frame.

        Args:
            frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`):
                Frame to track in.
        """
        # each track in tracks has format ([xmin, ymin, xmax, ymax], id)
        obstacles = []
        for track in self.tracker.trackers:
            coords = track.predict()[0].tolist()
            # changing to xmin, xmax, ymin, ymax format
            xmin = int(coords[0])
            xmax = int(coords[2])
            ymin = int(coords[1])
            ymax = int(coords[3])
            if xmin < xmax and ymin < ymax:
                bbox = BoundingBox2D(xmin, xmax, ymin, ymax)
                obstacles.append(Obstacle(bbox, 0, track.label, track.id))
            else:
                self._logger.error(
                    "Tracker found invalid bounding box {} {} {} {}".format(
                        xmin, xmax, ymin, ymax))
        return True, obstacles

    def convert_detections_for_sort_alg(self, obstacles):
        converted_detections = []
        labels = []
        ids = []
        for obstacle in obstacles:
            bbox = [
                obstacle.bounding_box_2D.x_min, obstacle.bounding_box_2D.y_min,
                obstacle.bounding_box_2D.x_max, obstacle.bounding_box_2D.y_max,
                obstacle.confidence
            ]
            converted_detections.append(bbox)
            labels.append(obstacle.label)
            ids.append(obstacle.id)
        return (np.array(converted_detections), labels, ids)
Exemple #2
0
def assign_ids(detections):
    """
    :param detections:
    :return:
    """
    mot_tracker = Sort()

    tracked_detections = []
    for detections_frame_bboxes in detections:
        if len(detections_frame_bboxes) == 0:
            detections_frame_bboxes = np.zeros((0, 5))
        tracked_detections.append(
            mot_tracker.update(np.array(detections_frame_bboxes)))

    return tracked_detections
Exemple #3
0
def video_detect(model, path_to_video, threshold=0.6, track=True):
    mot_tracker = Sort()
    cap = cv2.VideoCapture(path_to_video)
    out = cv2.VideoWriter(path_to_video + '-detections.avi',
                          cv2.VideoWriter_fourcc(*'XVID'), 30.0, (640, 480))
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    with torch.no_grad():
        model.eval()
        model.to(device)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print('No more frames')
                break
            pil_img = Image.fromarray(frame)
            tensor_img = to_tensor(pil_img).unsqueeze_(0)
            dets = model(tensor_img.to(device))
            if track:
                tracked_dets = None
                for box, score in zip(dets[0]['boxes'], dets[0]['scores']):
                    if score.item() >= threshold:
                        tracked_det = np.array([
                            torch.cat(
                                (box,
                                 score.reshape(1))).detach().cpu().numpy()
                        ])
                        tracked_dets = np.concatenate(
                            (tracked_dets, tracked_det
                             )) if tracked_dets is not None else tracked_det
                tracked_dets = mot_tracker.update(
                    tracked_dets if tracked_dets is not None else np.empty((
                        0, 5)))
                out.write(np.array(draw_object_id(tracked_dets, pil_img)))
            else:
                out.write(
                    np.array(
                        draw_class_labels(dets,
                                          tensor_img,
                                          get_coco_classes(),
                                          threshold=threshold)[0]))
    cap.release()
    out.release()
    cv2.destroyAllWindows()
Exemple #4
0
def track(video_path, use_gpu=False):
    video = cv2.VideoCapture(video_path)
    ret, frame = video.read()
    if ret:
        frame = cv2.resize(frame, (input_width, input_height))

    if use_gpu:
        caffe.set_mode_gpu()

    tracker = Sort(max_age=10)
    detector = Detector()
    classes = detector.get_classes()

    while ret:
        frame_disp = np.copy(frame)
        bounding_boxes, counting = detector.infer(frame)
        class_counting = zip(classes, counting)

        for pair in class_counting:
            print('{:s} {:03d}'.format(*pair))
        print('')

        if len(bounding_boxes) > 0:
            bounding_boxes = np.array(bounding_boxes, np.int32)

            # convert (x, y, w, h) to (x1, y1, x2, y2)
            bounding_boxes[:, 2:4] += bounding_boxes[:, 0:2]
            bounding_boxes[:, 2:4] -= 1

        track_results = tracker.update(bounding_boxes)
        draw_tracking_results(track_results, frame_disp)

        cv2.imshow('tracking', frame_disp)

        key = cv2.waitKey(1)
        if key == 27:
            return

        ret, frame = video.read()
        if ret:
            frame = cv2.resize(frame, (input_width, input_height))
Exemple #5
0
def sort(yolo, args):

    images_input = True if os.path.isdir(args.input) else False
    if images_input:
        # get images list
        jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg'))
        jpg_files = glob.glob(os.path.join(args.input, '*.jpg'))
        frame_capture = jpeg_files + jpg_files
        frame_capture.sort()
    else:
        # create video capture stream
        frame_capture = cv2.VideoCapture(0 if args.input ==
                                         '0' else args.input)
        if not frame_capture.isOpened():
            raise IOError("Couldn't open webcam or video")

    # create video save stream if needed
    save_output = True if args.output != "" else False
    if save_output:
        if images_input:
            raise ValueError("image folder input could be saved to video file")

        # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
        # to convert it to x264 to reduce file size:
        # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
        #
        #video_FourCC    = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC))
        video_FourCC = cv2.VideoWriter_fourcc(
            *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v")
        video_fps = frame_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        out = cv2.VideoWriter(args.output, video_FourCC,
                              (5. if args.input == '0' else video_fps),
                              video_size)

    if args.tracking_classes_path:
        # load the object classes used in tracking if have, other class
        # from detector will be ignored
        tracking_class_names = get_classes(args.tracking_classes_path)
    else:
        tracking_class_names = None

    # create instance of the SORT tracker
    tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)

    # alloc a set of queues to record motion trace
    # for each track id
    motion_traces = [deque(maxlen=30) for _ in range(9999)]
    total_obj_counter = []

    # initialize a list of colors to represent each possible class label
    np.random.seed(100)
    COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

    i = 0
    fps = 0.0
    while True:
        ret, frame = get_frame(frame_capture, i, images_input)
        if ret != True:
            break
        #time.sleep(0.2)
        i += 1

        start_time = time.time()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # detect object from image
        _, out_boxes, out_classnames, out_scores = yolo.detect_image(image)
        # get tracking objects
        boxes, class_names, scores = get_tracking_object(out_boxes,
                                                         out_classnames,
                                                         out_scores,
                                                         tracking_class_names,
                                                         convert_box=False)

        # form up detection records
        if len(boxes) != 0:
            detections = np.array([
                bbox + [score]
                for bbox, score, class_name in zip(boxes, scores, class_names)
            ])
        else:
            detections = np.empty((0, 5))

        # Call the tracker
        tracks = tracker.update(detections)

        # show all detection result as white box
        for j, bbox in enumerate(boxes):
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, class_names[j],
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (255, 255, 255), 2)

        track_indexes = []
        track_count = 0
        for track in tracks:
            bbox = track[:4]
            track_id = int(track[4])

            # record tracking info and get bbox
            track_indexes.append(int(track_id))
            total_obj_counter.append(int(track_id))

            # show all tracking result as color box
            color = [int(c) for c in COLORS[track_id % len(COLORS)]]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track_id),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (color), 2)

            #if track.class_name:
            #cv2.putText(frame, str(track.class_name), (int(bbox[0]+30), int(bbox[1]-20)), 0, 5e-3*150, (color), 2)

            track_count += 1

            # get center point (x,y) of current track bbox and record in queue
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            motion_traces[track_id].append(center)

            # draw current center point
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            #draw motion trace
            motion_trace = motion_traces[track_id]
            for j in range(1, len(motion_trace)):
                if motion_trace[j - 1] is None or motion_trace[j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]),
                         (color), thickness)

        # show tracking statistics
        total_obj_num = len(set(total_obj_counter))
        cv2.putText(frame, "Total Object Counter: " + str(total_obj_num),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(track_count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)

        # refresh window
        cv2.namedWindow("SORT", 0)
        cv2.resizeWindow('SORT', 1024, 768)
        # cv2.imshow('SORT', frame)  # Xander commented out

        if save_output:
            #save a frame
            out.write(frame)

        end_time = time.time()
        fps = (fps + (1. / (end_time - start_time))) / 2
        # Press q to stop video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything if job is finished
    if not images_input:
        frame_capture.release()
    if save_output:
        out.release()
    cv2.destroyAllWindows()
class CaptureProcessor:
    def __init__(
        self,
        cap,
        mask_filename,
        warp_filename,
        threshold,
        prefix="",
        output_path="crop_images",
    ):
        """CaptureProcessor starts a thread for processing ROIs defined in a mask file.
        The processor does the following tasks:

        - Crops the images to match masks
        - Warps ROI images to remove perspective distortion (if necessary)
        - Saves ROI images to file system (encrypted if necessary)
        - Detects vehicles in ROIs using Yolo object detection
        - Tracks vehicles using SORT algorithm
        - Saves metadata to a JSON file


        Args:
            cap (cv2.VideoCapture): OpenCV's VideoCapture object for either camera or video stream
            mask_filename (str): Filename of mask file in PNG format
            warp_filename (str): Filename of warp file in JSON format
            threshold (int): Threshold for perceptual hash to detect motion in ROI
            prefix (str, optional): Prefix for image and metadata files. Defaults to "".
            output_path (str, optional): Folder to save images and metadata. Defaults to "crop_images".
        """
        self.keep_processing = False
        self.cap = cap
        self.threshold = threshold
        self.prefix = prefix
        self.output_path = output_path
        self.mask_filename = mask_filename
        self.warp_filename = warp_filename
        self.image_cache = []
        self.keep_sending_after_phash_diff = 2.5  # seconds
        self.yolo = Yolov5()
        self.tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3)

    def start(self):
        """Start processing thread"""
        self.keep_processing = True
        self.mask = Mask(self.mask_filename)
        self.warp = Warp(self.warp_filename)

        self.yolo_thread = Thread(target=self._yolo_process, args=())
        self.yolo_thread.daemon = True
        self.yolo_thread.start()
        previous_roi_hash = [
            imagehash.phash(Image.fromarray(np.zeros((10, 10))))
        ] * self.mask.ROI_count()
        try:
            spf = 1 / float(self.cap.get(cv2.CAP_PROP_FPS))
        except Exception:
            # our camera does not provide FPS, low value to never wait
            spf = 0.01
        frame_no = -1

        keep_sending = 0
        frame_cache = []
        self.image_cache = []
        frame_date = datetime.now()
        while self.keep_processing:
            # prevent loop lock
            sleep(spf)
            if not self.cap.isOpened():
                sleep(0.5)
                continue
            ret, im = self.cap.read()

            if not ret:
                continue
            if im is None:
                continue
            try:
                if frame_no == self.cap.frame:
                    # we read the same frame twice.
                    continue
                frame_no = self.cap.frame
            except Exception:
                frame_no += 1
            try:
                frame_date = self.cap.frame_date
            except Exception:
                frame_date = datetime.now()

            if time() - keep_sending < self.keep_sending_after_phash_diff:
                # store frames for X seconds after movement
                frame_cache.append((frame_date, frame_no, im))
                im_last = im.copy()
                continue

            if len(frame_cache) > 0:
                # insert the whole block of frames at once
                # sanity check, cache can not be too big:
                # RAM can handle ~ 300 blocks/time to record
                if len(self.image_cache
                       ) < 300 / self.keep_sending_after_phash_diff:
                    self.image_cache.append(frame_cache)
                frame_cache = []
                # set phash based on last image in the block
                for i, roi_im in enumerate(self.mask.apply_ROIs(im_last)):
                    roi_im = self.warp.apply(roi_im, i)
                    roi_hash = imagehash.phash(Image.fromarray(roi_im))
                    previous_roi_hash[i] = roi_hash

            for i, roi_im in enumerate(self.mask.apply_ROIs(im)):
                roi_im = self.warp.apply(roi_im, i)
                roi_hash = imagehash.phash(Image.fromarray(roi_im))

                if previous_roi_hash[i] - roi_hash > self.threshold:
                    # some ROI contains change, keep caching images!
                    keep_sending = time()
                    frame_cache.append((frame_date, frame_no, im))
                    # break from ROI loop
                    break

    def stop(self):
        """Stop processing thread"""
        self.keep_processing = False

    def _yolo_process(self):
        """Run YOLO object detection and update tracker"""
        while self.keep_processing:
            # prevent loop lock
            sleep(0.01)

            if len(self.image_cache) == 0:
                continue
            started = time()
            image_list = self.image_cache.pop(0)
            frames_count = len(image_list)
            # skip frames if we're much behind
            # it could be even more sensitive, we used to get every 3rd frame before this
            # Heuristic model to increase skipping. go to 50% rate quite fast, and top at ~100 cache length
            try:
                skip_rate = int(-6 + 21 * np.log(len(self.image_cache) - 0.8))
            except ValueError:
                skip_rate = 0
            # Skip some frames anyway. we have enough FPS
            skip_rate = max(DEFAULT_SKIPRATE, skip_rate)
            frame_skip = self._discard_n(int(skip_rate), 100)
            timestamp = ""
            for list_index, (frame_date, frame_no,
                             im) in enumerate(image_list):
                if frame_skip[list_index % len(frame_skip)] == 1:
                    # skip frames if queue starts to get too long
                    continue
                if not self.keep_processing:
                    break
                detections = None
                for i, roi_im in enumerate(self.mask.apply_ROIs(im)):
                    roi_im = self.warp.apply(roi_im, i)
                    timestamp = frame_date.strftime(
                        "%Y_%m_%d_%H_%M_%S_%f")[:-3]
                    frame_name = (self.prefix +
                                  f"_ts_{timestamp}_roi_{i:02d}_f_{frame_no}")
                    metadata_name = frame_name + ".json"

                    if ENCRYPT:
                        frame_name += ".aes"
                        encrypt_image(
                            os.path.join(self.output_path, frame_name), roi_im)
                        if DEBUG:
                            cv2.imwrite(
                                os.path.join(self.output_path,
                                             frame_name + ".jpg"),
                                roi_im,
                            )
                    else:
                        frame_name += ".jpg"
                        cv2.imwrite(
                            os.path.join(self.output_path, frame_name),
                            roi_im,
                            [int(cv2.IMWRITE_JPEG_QUALITY), 97],
                        )

                    if not detections:
                        start_yolo = time()
                        all_detections = self.yolo.detect(im)
                        end_yolo = time()
                        detections = [
                            d for d in all_detections
                            if d["label"] in VALID_VEHICLE_CLASSES
                        ]

                        bboxes = np.array([det["bbox"] for det in detections])
                        confidences = np.array(
                            [det["confidence"] for det in detections])

                        start_tracker = time()
                        tracks = None
                        if bboxes.shape[0] == 0 or confidences.shape[0] == 0:
                            tracks = self.tracker.update()
                        else:
                            tracks = self.tracker.update(np.c_[bboxes,
                                                               confidences])

                    roi_detections, roi_iods = self.mask.get_roi_detections(
                        detections, i)

                    track_ids = []
                    if roi_detections:
                        track_ids = self._track_ids_for_detections(
                            im, roi_detections, tracks)
                    end_tracker = time()
                    roi_metadata = {}
                    roi_metadata["detections"] = roi_detections
                    roi_metadata["iods"] = roi_iods
                    roi_metadata["track_ids"] = track_ids
                    roi_metadata["roi_offset"] = self.mask.get_roi_offset(i)
                    roi_metadata["roi_dims"] = [
                        roi_im.shape[1], roi_im.shape[0]
                    ]

                    with open(
                            os.path.join(self.output_path, metadata_name),
                            "w",
                            encoding="utf-8",
                    ) as f:
                        json.dump(roi_metadata, f, ensure_ascii=False)
                    logging.info(
                        "TIMERS: YOLO: {}s, tracker: {}s,  skipper: {}%, cache: {}, tracks: {}"
                        .format(
                            round(end_yolo - start_yolo, 2),
                            round(end_tracker - start_tracker, 2),
                            sum(frame_skip),
                            len(self.image_cache),
                            str(track_ids),
                        ))

            logging.info(
                "YOLO block analysis time. {}s {}FPS, blocks {}, last ts {}".
                format(
                    int(time() - started),
                    round(frames_count / (time() - started), 2),
                    len(self.image_cache),
                    timestamp,
                ))

    def _track_ids_for_detections(self, im, detections, tracks):
        """This function maps bounding boxes received from SORT tracking back to
        original object detections. Matches are determined using a suitable distance threshold.

        Args:
            im (numpy.ndarray): Input image whose dimensions are used to determine suitable threshold
            detections (List): List of dictionaries containing object detection data
            tracks (numpy.ndarray): Bounding boxes and tracking identifiers from SORT algorithm

        Returns:
            List: Tracking identifiers matching object detections
        """
        track_ids = [-1] * len(detections)
        bboxes = np.array([det["bbox"] for det in detections])

        # SORT does not return an index for detection so set threshold based on image size
        sort_match_limit = np.square((im.shape[0] + im.shape[1]) * 0.5 * 0.02)

        for i in range(tracks.shape[0]):
            ss = np.sum(np.square(bboxes - tracks[i, :4]), axis=1)
            min_row = np.argmin(ss, axis=0)

            if ss[min_row] < sort_match_limit:
                track_ids[min_row] = int(tracks[i, 4])
            else:
                track_ids[min_row] = -1

        return track_ids

    def _discard_n(self, n, length=30):
        """from 30 FPS hypothesis, discard N frames.

        Args:
            n (int): Number frames to skip (number of 1's in output array)
            length (int, optional): Length of output array. Defaults to 30.

        Returns:
            List: Array of zeros and ones
        """

        if n <= 0:
            return [0] * length
        if n >= length:
            return [1] * length
        if n < length / 2:
            lin_num = n + 1
            values = (1, 0)
            start_value = 0
        else:
            lin_num = (length - n) + 1
            values = (0, 1)
            start_value = 1
        include = np.linspace(0, length - 1,
                              num=lin_num).astype("int").tolist()
        e = [
            values[0] if k in include else values[1]
            for k in reversed(range(length))
        ]
        e[0] = start_value
        return e
        raise argparse.ArgumentTypeError('Please specify the date and camera pose for video clips first!')
    else:
        date = args.date
        cam_pose = args.campose

    total_pcount_each_minute = np.zeros((12, 60), dtype=np.int32)       # 12 hours from 10am to 22pm

    # prepare id tracker
    mot_tracker = Sort(max_age=10, min_hits=3)

    for hour in np.arange(10,22):
        for minute in np.arange(60):
            print("loading ../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4".format(cam_pose, date, hour, minute))
            cap = cv2.VideoCapture('../datasets/TongYing/{}/{}/{:02d}/{:02d}.mp4'.format(cam_pose, date, hour, minute))

            mot_tracker.update([])      # just in case the first file does not exist

            while (cap.isOpened()):
                ret, frame = cap.read()
                if ret:
                    # resize
                    img = cv2.resize(frame, net_shape[::-1], interpolation=cv2.INTER_CUBIC)
                    # start = time.time()
                    rclasses, rscores, rbboxes = process_image(img, net_shape=net_shape)
                    # end = time.time()
                    # # debug
                    # print('Time elapsed to process one {} img: {:.03f} sec'.format(net_shape, end-start))

                    person_select_indicator = (rclasses == 15)  # pedestrians only
                    rclasses = rclasses[person_select_indicator]
                    rscores = rscores[person_select_indicator]      # confidence
class interpreter(object):
    def __init__(self, args, data_set, ENABLE_TRACKING=None):
        self.tracker = Sort()
        self.args = args
        self.nms_thres = args.nms
        self.triplet_nms_thres = args.triplet_nms
        self.obj_thres = args.obj_thres
        self.triplet_thres = args.triplet_thres
        self.tobefiltered_objects = [
            26, 53, 134, 247, 179, 74, 226, 135, 145, 300, 253, 95, 11, 102, 87
        ]
        # 26: wheel, 53: backpack, 143:light, 247:camera, 179:board
        # 74:shoe, 226:chair, 135:shelf, 145:button, 300:cake, 253:knob, 95:wall, 11:door, 102:mirror,87:ceiling
        if ENABLE_TRACKING == None:
            self.ENABLE_TRACKING = False if self.args.dataset == 'visual_genome' else True
        else:
            self.ENABLE_TRACKING = ENABLE_TRACKING
        if self.ENABLE_TRACKING and self.args.path_opt.split(
                '/')[-1] == 'VG-DR-Net.yaml':
            self.tobefiltered_predicates = [0, 6, 10, 18, 19, 20, 22, 23, 24]
            # 0:backgrounds, 6:eat,10:wear, 18:ride, 19:watch, 20:play, 22:enjoy, 23:read, 24:cut

        elif self.ENABLE_TRACKING and self.args.path_opt.split(
                '/')[-1] == 'VG-MSDN.yaml':
            self.tobefiltered_predicates = [12, 18, 27, 28, 30, 31, 32, 35]
        else:
            self.tobefiltered_predicates = []

        # Params for Statistics Based Scene Graph Inference
        self.relation_statistics = prior.load_obj("relation_prior_prob")
        self.joint_probability = prior.load_obj("object_prior_prob")
        self.spurious_rel_thres = 0.07
        self.rel_infer_thres = 0.9
        self.obj_infer_thres = 0.001
        self.data_set = data_set
        self.detected_obj_set = set()
        self.fasttext = torchtext.vocab.FastText()
        self.word_vecs, self.word_itos, self.word_stoi = self.prepare_wordvecs(
            num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES)
        self.pred_stoi = {
            self.data_set.predicate_classes[i]: i
            for i in range(len(self.data_set.predicate_classes))
        }

    # p(x, y)
    def cal_p_xy_joint(self, x_ind, y_ind):
        p_xy = self.joint_probability[x_ind, y_ind] / np.sum(
            self.joint_probability)
        return p_xy

    # p(x|y)
    def cal_p_x_given_y(self, x_ind, y_ind):
        single_prob = np.sum(self.joint_probability, axis=1)
        p_y = single_prob[y_ind]
        p_xy = self.joint_probability[x_ind, y_ind]
        return p_xy / p_y

    # p(x|y,z) approximated
    def cal_p_x_given_yz(self, x_ind, y_ind, z_ind):
        p_x_given_y = self.cal_p_x_given_y(x_ind, y_ind)
        p_x_given_z = self.cal_p_x_given_y(x_ind, z_ind)
        return min(p_x_given_y, p_x_given_z)

    # True if p(x, z)^2 < p(x,y)*p(y,z)
    def check_prob_condition(self, x_ind, y_ind, z_ind):
        p_xz = self.cal_p_xy_joint(x_ind, z_ind)
        p_xy = self.cal_p_xy_joint(x_ind, y_ind)
        p_yz = self.cal_p_xy_joint(y_ind, z_ind)
        return p_xz**2 < p_xy * p_yz

    def prepare_wordvecs(self, num_vocabs=400, ignores=VG_DR_NET_OBJ_IGNORES):
        word_inds = range(num_vocabs)
        word_inds = [x for x in word_inds if x not in ignores]
        word_txts = [self.data_set.object_classes[x] for x in word_inds]
        self.word_ind2vec = {
            ind: self.fasttext.vectors[self.fasttext.stoi[x]]
            for ind, x in zip(word_inds, word_txts)
        }

        word_vecs = torch.stack([
            self.fasttext.vectors[self.fasttext.stoi[x]] for x in word_txts
        ]).cuda()
        word_itos = {
            i: self.data_set.object_classes[x]
            for i, x in enumerate(word_inds)
        }
        word_stoi = {
            self.data_set.object_classes[x]: i
            for i, x in enumerate(word_inds)
        }
        return word_vecs, word_itos, word_stoi

    def update_obj_set(self, obj_inds):
        for obj_ind in obj_inds[:, 0]:
            self.detected_obj_set.add(obj_ind)

    def find_disconnected_pairs(self, obj_inds, relationships):
        connected_pairs = set(
            tuple(x) for x in relationships[:, :2].astype(int).tolist())
        disconnected_pairs = set()
        for i in range(len(obj_inds)):
            for j in range(len(obj_inds)):
                if i == j: continue
                if (i, j) in connected_pairs or (j, i) in connected_pairs:
                    continue
                disconnected_pairs.add((i, j))
        return disconnected_pairs

    def missing_relation_inference(self, obj_inds, obj_boxes,
                                   disconnected_pairs):
        infered_relation = set()
        #print('discon:',disconnected_pairs)
        for i in range(len(disconnected_pairs)):
            pair = disconnected_pairs.pop()
            node1_box, node2_box = obj_boxes[pair[0]], obj_boxes[pair[1]]
            distance = self.distance_between_boxes(
                np.stack([node1_box, node2_box], axis=0))[0, 1]
            pair_txt = [
                self.data_set.object_classes[obj_inds[pair[0]][0]],
                self.data_set.object_classes[obj_inds[pair[1]][0]]
            ]
            candidate, prob, direction = prior.most_probable_relation_for_unpaired(
                pair_txt, self.relation_statistics, int(distance))
            if candidate != None and prob > self.rel_infer_thres:
                if not direction: pair = (pair[1], pair[0])
                infered_relation.add(
                    (pair[0], pair[1], self.pred_stoi[candidate], prob))
                pair_txt = [
                    self.data_set.object_classes[obj_inds[pair[0]][0]],
                    self.data_set.object_classes[obj_inds[pair[1]][0]]
                ]
                #print('dsfsfd:',pair_txt[0],pair_txt[1],candidate,prob)
        infered_relation = np.array(list(infered_relation)).reshape(-1, 4)
        #print(infered_relation)
        return infered_relation

    def missing_object_inference(self, obj_inds, disconnected_pairs):
        detected_obj_list = np.array(list(self.detected_obj_set))
        candidate_searchspace = [
            self.word_ind2vec[x] for x in detected_obj_list
        ]
        candidate_searchspace = torch.stack(candidate_searchspace,
                                            dim=0).cuda()
        search_size = candidate_searchspace.shape[0]
        infered_obj_list = []

        for i in range(len(disconnected_pairs)):
            pair = disconnected_pairs.pop()
            ''' wordvec based candidate objects filtering '''
            #print(pair)
            sbj_vec = self.word_ind2vec[obj_inds[pair[0]][0]].cuda()
            obj_vec = self.word_ind2vec[obj_inds[pair[1]][0]].cuda()
            sim_sbj_obj = cosine_similarity(sbj_vec, obj_vec, dim=0)

            sbj_vec = sbj_vec.expand_as(candidate_searchspace)
            obj_vec = obj_vec.expand_as(candidate_searchspace)
            sim_cans_sbj = cosine_similarity(candidate_searchspace,
                                             sbj_vec,
                                             dim=1)
            sim_cans_obj = cosine_similarity(candidate_searchspace,
                                             obj_vec,
                                             dim=1)
            sim_sbj_obj = sim_sbj_obj.expand_as(sim_cans_obj)
            keep = (sim_cans_sbj + sim_cans_obj >
                    2 * sim_sbj_obj).nonzero().view(-1).cpu().numpy()
            #print(keep)
            #print(detected_obj_list)
            candidate_obj_list = detected_obj_list[keep]
            if len(candidate_obj_list) == 0: continue
            ''' statistics based candidate objects filtering '''
            keep = []
            for i, obj_ind in enumerate(candidate_obj_list):
                if self.check_prob_condition(obj_inds[pair[0]][0], obj_ind,
                                             obj_inds[pair[1]][0]):
                    keep.append(i)
            candidate_obj_list = candidate_obj_list[keep]
            if len(candidate_obj_list) == 0: continue
            ''' choose a candidate with best score above threshold'''
            probs = [
                self.cal_p_x_given_yz(candidate, obj_inds[pair[0]][0],
                                      obj_inds[pair[1]][0])
                for candidate in candidate_obj_list
            ]
            chosen_obj = candidate_obj_list[(np.array(probs)).argmax()]
            infered_obj_list.append(chosen_obj)
            #print(max(probs),self.data_set.object_classes[obj_inds[pair[0]][0]],
            #      self.data_set.object_classes[chosen_obj],
            #      self.data_set.object_classes[obj_inds[pair[1]][0]])

    def get_box_centers(self, boxes):
        # Define bounding box info
        center_x = (boxes[:, 0] + boxes[:, 2]) / 2
        center_y = (boxes[:, 1] + boxes[:, 3]) / 2
        centers = np.concatenate(
            [center_x.reshape(-1, 1),
             center_y.reshape(-1, 1)], axis=1)
        return centers

    def distance_between_boxes(self, boxes):
        '''
        returns all possible distances between boxes

        :param boxes:
        :return: dist: distance between boxes[1] and boxes[2] ==> dist[1,2]
        '''
        centers = self.get_box_centers(boxes)
        centers_axis1 = np.repeat(centers, centers.shape[0],
                                  axis=0).reshape(-1, 2)
        centers_axis2 = np.stack([centers for _ in range(centers.shape[0])
                                  ]).reshape(-1, 2)
        dist = np.linalg.norm(centers_axis1 - centers_axis2,
                              axis=1).reshape(-1, centers.shape[0])
        return dist

    def spurious_relation_rejection(self, obj_boxes, obj_cls, relationships):
        if self.args.disable_spurious: return range(len(relationships))
        subject_inds = obj_cls[relationships.astype(int)[:, 0]][:, 0]
        pred_inds = relationships.astype(int)[:, 2]
        object_inds = obj_cls[relationships.astype(int)[:, 1]][:, 0]

        subject_boxes = obj_boxes[relationships.astype(int)[:, 0]]
        object_boxes = obj_boxes[relationships.astype(int)[:, 1]]

        keep = []
        for i, (sbj_ind, pred_ind, obj_ind, sbj_box, obj_box) in enumerate(
                zip(subject_inds, pred_inds, object_inds, subject_boxes,
                    object_boxes)):
            relation_txt = [
                self.data_set.object_classes[sbj_ind],
                self.data_set.predicate_classes[pred_ind],
                self.data_set.object_classes[obj_ind]
            ]
            distance = self.distance_between_boxes(
                np.stack([sbj_box, obj_box], axis=0))[0, 1]
            prob = prior.triplet_prob_from_statistics(relation_txt,
                                                      self.relation_statistics,
                                                      int(distance))
            print('prob: {prob:3.2f}     {sbj:15}{rel:15}{obj:15}'.format(
                prob=prob,
                sbj=relation_txt[0],
                rel=relation_txt[1],
                obj=relation_txt[2]))

            if prob > self.spurious_rel_thres: keep.append(i)

        return keep

    def interpret_graph(self, object_result, predicate_result, im_info):
        cls_prob_object, bbox_object, object_rois, reranked_score = object_result[:
                                                                                  4]
        cls_prob_predicate, mat_phrase = predicate_result[:2]
        region_rois_num = predicate_result[2]

        obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships = \
            self.interpret_graph_(cls_prob_object, bbox_object, object_rois,
                                    cls_prob_predicate, mat_phrase, im_info,
                                    reranked_score)
        ''' missing object inference '''
        # self.update_obj_set(obj_cls)
        # disconnected_pairs = self.find_disconnected_pairs(obj_cls, relationships)
        # self.missing_object_inference(obj_cls,disconnected_pairs)
        ''' missing object infernce (end) '''
        ''' missing relation inference '''
        # infered_relations = self.missing_relation_inference(obj_cls,obj_boxes,disconnected_pairs)
        # print('size:',relationships.shape,infered_relations.shape)
        #
        # relationships = np.concatenate([relationships,infered_relations],axis=0)
        #
        # predicate_inds = relationships[:, 2].astype(int)
        # subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
        # object_boxes = obj_boxes[relationships[:, 1].astype(int)]
        # subject_IDs = np.array([int(obj_boxes[int(relation[0])][4]) for relation in relationships])
        # object_IDs = np.array([int(obj_boxes[int(relation[1])][4]) for relation in relationships])
        # subject_inds = obj_cls[relationships[:, 0].astype(int)]
        # object_inds = obj_cls[relationships[:, 1].astype(int)]
        # subject_scores = [obj_scores[int(relation[0])] for relation in relationships]
        # pred_scores = [relation[3] / obj_scores[int(relation[0])] / obj_scores[int(relation[1])] for relation in
        #                relationships]
        # object_scores = [obj_scores[int(relation[1])] for relation in relationships]
        # triplet_scores = np.array(zip(subject_scores, pred_scores, object_scores))
        ''' missing relation inference (end) '''

        keep = self.spurious_relation_rejection(obj_boxes, obj_cls,
                                                relationships)

        return obj_boxes, obj_scores, obj_cls, \
               subject_inds[keep], object_inds[keep], \
               subject_boxes[keep], object_boxes[keep], \
               subject_IDs[keep], object_IDs[keep], \
               predicate_inds[keep], triplet_scores[keep], relationships[keep]

    def interpret_graph_(self,
                         cls_prob_object,
                         bbox_object,
                         object_rois,
                         cls_prob_predicate,
                         mat_phrase,
                         im_info,
                         reranked_score=None):

        obj_boxes, obj_scores, obj_cls, subject_inds, object_inds, \
        subject_boxes, object_boxes, predicate_inds, \
        sub_assignment, obj_assignment, total_score = \
            self.interpret_relationships(cls_prob_object, bbox_object, object_rois,
                                         cls_prob_predicate, mat_phrase, im_info,
                                         nms=self.nms_thres, topk_pred=2, topk_obj=3,
                                         use_gt_boxes=False,
                                         triplet_nms=self.triplet_nms_thres,
                                         reranked_score=reranked_score)

        obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships = self.filter_and_tracking(obj_boxes, obj_scores, obj_cls,
                                                                                     subject_inds, object_inds,
                                                                                     subject_boxes, object_boxes,
                                                                                     predicate_inds,
                                                                                     sub_assignment, obj_assignment,
                                                                                     total_score)

        return obj_boxes, obj_scores, obj_cls, \
        subject_inds, object_inds, \
        subject_boxes, object_boxes, \
        subject_IDs, object_IDs, \
        predicate_inds, triplet_scores, relationships

    def interpret_relationships(self,
                                cls_prob,
                                bbox_pred,
                                rois,
                                cls_prob_predicate,
                                mat_phrase,
                                im_info,
                                nms=-1.,
                                clip=True,
                                min_score=0.01,
                                top_N=100,
                                use_gt_boxes=False,
                                triplet_nms=-1.,
                                topk_pred=2,
                                topk_obj=3,
                                reranked_score=None):

        scores, inds = cls_prob[:, 1:].data.topk(k=topk_obj, dim=1)
        if reranked_score is not None:
            if isinstance(reranked_score, Variable):
                reranked_score = reranked_score.data
            scores *= reranked_score
        inds += 1
        scores, inds = scores.cpu().numpy(), inds.cpu().numpy()
        # filter out objects with wrong class
        for i, ind in enumerate(inds):
            if ind[0] in self.tobefiltered_objects:
                scores[i].fill(0)

        predicate_scores, predicate_inds = cls_prob_predicate[:, 1:].data.topk(
            dim=1, k=topk_pred)
        predicate_inds += 1
        predicate_scores, predicate_inds = predicate_scores.cpu().numpy(
        ).reshape(-1), predicate_inds.cpu().numpy().reshape(-1)

        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data.cpu().numpy()
        box_deltas = np.asarray([
            box_deltas[i, (inds[i][0] * 4):(inds[i][0] * 4 + 4)]
            for i in range(len(inds))
        ],
                                dtype=np.float)
        keep = range(scores.shape[0])
        if use_gt_boxes:
            triplet_nms = -1.
            pred_boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]
        else:
            pred_boxes = bbox_transform_inv_hdn(
                rois.data.cpu().numpy()[:, 1:5], box_deltas) / im_info[0][2]
            pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2])

            # nms
            if nms > 0. and pred_boxes.shape[0] > 0:
                assert nms < 1., 'Wrong nms parameters'
                pred_boxes, scores, inds, keep = nms_detections(pred_boxes,
                                                                scores,
                                                                nms,
                                                                inds=inds)

        sub_list = np.array([], dtype=int)
        obj_list = np.array([], dtype=int)
        pred_list = np.array([], dtype=int)

        # mapping the object id
        mapping = np.ones(cls_prob.size(0), dtype=np.int64) * -1
        mapping[keep] = range(len(keep))

        sub_list = mapping[mat_phrase[:, 0]]
        obj_list = mapping[mat_phrase[:, 1]]
        pred_remain = np.logical_and(sub_list >= 0, obj_list >= 0)
        pred_list = np.where(pred_remain)[0]
        sub_list = sub_list[pred_remain]
        obj_list = obj_list[pred_remain]

        # expand the sub/obj and pred list to k-column
        pred_list = np.vstack([
            pred_list * topk_pred + i for i in range(topk_pred)
        ]).transpose().reshape(-1)
        sub_list = np.vstack([sub_list for i in range(topk_pred)
                              ]).transpose().reshape(-1)
        obj_list = np.vstack([obj_list for i in range(topk_pred)
                              ]).transpose().reshape(-1)

        if use_gt_boxes:
            total_scores = predicate_scores[pred_list]
        else:
            total_scores = predicate_scores[pred_list] * scores[
                sub_list][:, 0] * scores[obj_list][:, 0]

        top_N_list = total_scores.argsort()[::-1][:10000]
        total_scores = total_scores[top_N_list]
        pred_ids = predicate_inds[
            pred_list[top_N_list]]  # category of predicates
        sub_assignment = sub_list[top_N_list]  # subjects assignments
        obj_assignment = obj_list[top_N_list]  # objects assignments
        sub_ids = inds[:, 0][sub_assignment]  # category of subjects
        obj_ids = inds[:, 0][obj_assignment]  # category of objects
        sub_boxes = pred_boxes[sub_assignment]  # boxes of subjects
        obj_boxes = pred_boxes[obj_assignment]  # boxes of objects

        if triplet_nms > 0.:
            sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, keep = triplet_nms_py(
                sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, triplet_nms)
            sub_assignment = sub_assignment[keep]
            obj_assignment = obj_assignment[keep]
            total_scores = total_scores[keep]
        if len(sub_list) == 0:
            print('No Relatinoship remains')
            # pdb.set_trace()

        return pred_boxes, scores, inds, sub_ids, obj_ids, sub_boxes, obj_boxes, pred_ids, sub_assignment, obj_assignment, total_scores

    def filter_and_tracking(self, obj_boxes, obj_scores, obj_cls, subject_inds,
                            object_inds, subject_boxes, object_boxes,
                            predicate_inds, sub_assignment, obj_assignment,
                            total_score):

        relationships = np.array(
            zip(sub_assignment, obj_assignment, predicate_inds, total_score))

        # filter out bboxes who has low obj_score
        keep_obj = np.where(obj_scores[:, 0] >= self.obj_thres)[0]
        if keep_obj.size == 0:
            print("no object detected ...")
            keep_obj = [0]
        cutline_idx = max(keep_obj)
        obj_scores = obj_scores[:cutline_idx + 1]
        obj_boxes = obj_boxes[:cutline_idx + 1]
        obj_cls = obj_cls[:cutline_idx + 1]

        # filter out triplets whose obj/sbj have low obj_score
        if relationships.size > 0:
            keep_sub_assign = np.where(relationships[:, 0] <= cutline_idx)[0]
            relationships = relationships[keep_sub_assign]
        if relationships.size > 0:
            keep_obj_assign = np.where(relationships[:, 1] <= cutline_idx)[0]
            relationships = relationships[keep_obj_assign]

        # filter out triplets who have low total_score
        if relationships.size > 0:
            keep_rel = np.where(relationships[:, 3] >= self.triplet_thres)[
                0]  # MSDN:0.02, DR-NET:0.03
            # if keep_rel.size > 0:
            #     cutline_idx = max(keep_rel)
            #     relationships = relationships[:cutline_idx + 1]
            relationships = relationships[keep_rel]

        # filter out triplets whose sub equal obj
        if relationships.size > 0:

            #keep_rel = np.where(relationships[:, 0] != relationships[:, 1])[0]
            #relationships = relationships[keep_rel]
            keep_rel = []
            for i, relation in enumerate(relationships):
                if relation[0] != relation[1]:
                    keep_rel.append(i)
            keep_rel = np.array(keep_rel).astype(int)
            relationships = relationships[keep_rel]
            # print('filter1')
            # print(relationships.astype(int))

        # filter out triplets whose predicate is related to human behavior.
        if relationships.size > 0:
            keep_rel = []
            for i, relation in enumerate(relationships):
                if int(relation[2]) not in self.tobefiltered_predicates:
                    keep_rel.append(i)
            keep_rel = np.array(keep_rel).astype(int)
            #print('keep_rel:',keep_rel)
            relationships = relationships[keep_rel]
            # print('filter2')
            # print(relationships.astype(int))

        # Object tracking
        # Filter out all un-tracked objects and triplets
        if self.ENABLE_TRACKING:
            print(obj_boxes.shape)
            tracking_input = np.concatenate(
                (obj_boxes, obj_scores[:, 0].reshape(len(obj_scores), 1)),
                axis=1)
            bboxes_and_uniqueIDs = self.tracker.update(tracking_input)
            keep = filter_untracted(bboxes_and_uniqueIDs, obj_boxes)
            print(relationships.shape)

            # filter out triplets whose obj/sbj is untracked.
            if relationships.size > 0:
                keep_sub_assign = [
                    np.where(relationships[:, 0] == keep_idx)
                    for keep_idx in keep
                ]
                if len(keep_sub_assign) > 0:
                    keep_sub_assign = np.concatenate(keep_sub_assign,
                                                     axis=1).flatten()
                    relationships = relationships[keep_sub_assign]
                else:
                    relationships = relationships[np.array([]).astype(int)]
            if relationships.size > 0:
                keep_obj_assign = [
                    np.where(relationships[:, 1] == keep_idx)
                    for keep_idx in keep
                ]
                if len(keep_obj_assign) > 0:
                    keep_obj_assign = np.concatenate(keep_obj_assign,
                                                     axis=1).flatten()
                    relationships = relationships[keep_obj_assign]
                else:
                    relationships = relationships[np.array([]).astype(int)]
            #
            print('filter3')
            print(relationships.astype(int))
            print(keep)
            rel = relationships.copy()
            for i, k in enumerate(keep):
                relationships[:, :2][rel[:, :2] == k] = i

            sorted = relationships[:, 3].argsort()[::-1]
            relationships = relationships[sorted]
            #print('filter4')
            #print(relationships[:,3])

            subject_inds = obj_cls[relationships[:, 0].astype(int)]
            object_inds = obj_cls[relationships[:, 1].astype(int)]

            obj_boxes = np.concatenate(
                [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1)
            for i, keep_idx in enumerate(keep):
                obj_boxes[keep_idx] = bboxes_and_uniqueIDs[i]
            obj_scores = obj_scores[keep]
            obj_cls = obj_cls[keep]
            obj_boxes = obj_boxes[keep]

            #obj_boxes = bboxes_and_uniqueIDs

            print(obj_scores.shape)
            print(obj_cls.shape)
            print(obj_boxes.shape)
            print(relationships.shape)

        else:
            obj_boxes = np.concatenate(
                [obj_boxes, np.zeros([obj_boxes.shape[0], 1])], axis=1)
            for i in range(len(obj_boxes)):
                obj_boxes[i][4] = i
            subject_inds = obj_cls[relationships[:, 0].astype(int)]
            object_inds = obj_cls[relationships[:, 1].astype(int)]
            #subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
            #object_boxes = obj_boxes[relationships[:, 1].astype(int)]
            #subject_IDs = subject_boxes[:, 4].astype(int)
            #object_IDs = object_boxes[:, 4].astype(int)

        predicate_inds = relationships[:, 2].astype(int)
        subject_boxes = obj_boxes[relationships[:, 0].astype(int)]
        object_boxes = obj_boxes[relationships[:, 1].astype(int)]
        subject_IDs = np.array([
            int(obj_boxes[int(relation[0])][4]) for relation in relationships
        ])
        object_IDs = np.array([
            int(obj_boxes[int(relation[1])][4]) for relation in relationships
        ])

        subject_scores = [
            obj_scores[int(relation[0])] for relation in relationships
        ]
        pred_scores = [
            relation[3] / obj_scores[int(relation[0])] /
            obj_scores[int(relation[1])] for relation in relationships
        ]
        object_scores = [
            obj_scores[int(relation[1])] for relation in relationships
        ]
        triplet_scores = np.array(
            zip(subject_scores, pred_scores, object_scores))

        #print(relationships)


        return obj_boxes, obj_scores, obj_cls, \
               subject_inds, object_inds, \
               subject_boxes, object_boxes, \
               subject_IDs, object_IDs, \
               predicate_inds, triplet_scores, relationships
Exemple #9
0
def detect_and_track(file_path, save_path, detection_mode="SSD"):
    # 如果要保存视频,定义视频size
    size = (640, 480)
    save_fps = 24
    # 假设图中最多300个目标,生成300种随机颜色
    colours = np.random.rand(300, 3) * 255
    # 为True保存检测后视频
    write_video_flag = True
    video_capture = cv2.VideoCapture(file_path)
    mot_tracker = Sort()

    if write_video_flag:
        output_video = cv2.VideoWriter(
            save_path + 'output.mp4',
            cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), save_fps, size)
        object_list_file = open(save_path + 'detection.txt', 'w')
        frame_index = -1

    if detection_mode == "SSD":
        ssd = SSD()
    elif detection_mode == "YOLO3":
        yolo = YOLO()
    elif detection_mode == "CENTERNET":
        centernet = CenterNet()

    # appear记录每个出现过的目标存在的帧数量,number记录所有出现过的目标(不重复)
    appear = {}
    number = 0

    while True:
        ret, frame = video_capture.read()
        if ret is not True:
            break
        frame = cv2.resize(frame, size)
        # 记录每一帧开始处理的时间
        start_time = time.time()
        if detection_mode == "SSD":
            image = frame
            classes, scores, bboxes = ssd.process_image(image)
            # 获得检测到的每个目标的左上角和右下角坐标
            result = np.array(
                detect_and_visualization_image.plt_bboxes(
                    image, classes, scores, bboxes))
            rbboxes = []
            for object in result:
                rbboxes.append([object[0], object[1], object[2], object[3]])

        elif detection_mode == "YOLO3":
            image = Image.fromarray(frame[..., ::-1])
            # bboxes为[x,y,w,h]形式坐标,score为目标分数,rbboxes为左上角+右下角坐标形式
            bboxes, scores, rbboxes = yolo.detect_image(image)
            result = []
            for box, score in zip(rbboxes, scores):
                # 使用目标左上角和右下角坐标用于追踪,注意图像的左上角为原点,x轴向右为正,y轴向下为正
                ymin, xmin, ymax, xmax = box
                xmin, ymin = max(0,
                                 np.floor(xmin + 0.5).astype('int32')), max(
                                     0,
                                     np.floor(ymin + 0.5).astype('int32'))
                xmax, ymax = min(image.size[0],
                                 np.floor(xmax + 0.5).astype('int32')), min(
                                     image.size[1],
                                     np.floor(ymax + 0.5).astype('int32'))
                result.append([xmin, ymin, xmax, ymax, score])
            result = np.array(result)
        elif detection_mode == "CENTERNET":
            image = frame
            # 这里的boxes_results是左上角和右下角坐标
            rbboxes, scores, classes = centernet.detect_image(image)
            result = []
            for i in range(len(rbboxes)):
                result.append([
                    rbboxes[i][0], rbboxes[i][1], rbboxes[i][2], rbboxes[i][3],
                    scores[i]
                ])
            result = np.array(result)

        if len(result) != 0:
            # 调用目标检测结果
            det = result[:, 0:5]
        else:
            det = result
        # 调用sort进行数据关联追踪
        trackers = mot_tracker.update(det)
        for object in trackers:
            xmin, ymin, xmax, ymax, index = int(object[0]), int(
                object[1]), int(object[2]), int(object[3]), int(object[4])
            color = (int(colours[index % 300, 0]), int(colours[index % 300,
                                                               1]),
                     int(colours[index % 300, 2]))
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(frame, str(index), (xmin, ymin), 0, 5e-3 * 200, color,
                        2)
            if index in appear.keys():
                appear[index] += 1
            else:
                number += 1
                appear[index] = 1

        show_fps = 1. / (time.time() - start_time)
        cv2.putText(frame,
                    text="FPS: " + str(int(show_fps)),
                    org=(3, 15),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(0, 255, 0),
                    thickness=2)
        cv2.putText(frame,
                    text="number: " + str(number),
                    org=(3, 30),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(0, 255, 0),
                    thickness=2)
        cv2.imshow('result', frame)

        if write_video_flag:
            # 保存视频每一帧
            output_video.write(frame)
            # 更新视频帧编号
            frame_index = frame_index + 1
            # detection.txt写入下一帧的编号
            object_list_file.write(str(frame_index) + ' ')
            # 写入每一帧探测到的目标位置,即目标狂的左上角和右下角坐标
            if len(rbboxes) != 0:
                for i in range(0, len(rbboxes)):
                    object_list_file.write(
                        str(rbboxes[i][0]) + ' ' + str(rbboxes[i][1]) + ' ' +
                        str(rbboxes[i][2]) + ' ' + str(rbboxes[i][3]) + ' ')
            object_list_file.write('\n')

        # 按q可退出
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if write_video_flag:
        output_video.release()
        object_list_file.close()
    cv2.destroyAllWindows()
class KalmanTracker(object):
    def __init__(self, classes, tracker='sort'):
        self.ttype = tracker
        self.classes = classes
        if tracker == 'deep_sort':
            from deep_sort import generate_detections
            from deep_sort.deep_sort import nn_matching
            from deep_sort.deep_sort.tracker import Tracker

            metric = nn_matching.NearestNeighborDistanceMetric(
                "cosine", 0.2, 100)  #param
            self.nms_max_overlap = 0.1  #param
            model_path = os.path.join(WORK_DIR, MODEL_DIR,
                                      "mars-small128.ckpt-68577")
            self.encoder = generate_detections.create_box_encoder(model_path)
            self.tracker = Tracker(metric)

            from deep_sort.application_util import preprocessing as prep
            from deep_sort.deep_sort.detection import Detection
            self.prep = prep
            self.Detection = Detection

        elif tracker == 'sort':
            from sort.sort import Sort
            self.tracker = Sort()

        self.trackers = {}

    def update(self, imgcv, detections):
        boxes = to_cvbox(detections, self.classes)
        detections, scores = [], []
        ids, bboxes = [], []

        for b in boxes:
            left, top, right, bot, confidence = b
            if self.ttype == 'deep_sort':
                detections.append(
                    np.array([left, top, right - left,
                              bot - top]).astype(np.float64))
                scores.append(confidence)
            elif self.ttype == 'sort':
                detections.append(
                    np.array([left, top, right, bot]).astype(np.float64))

        if self.ttype == "deep_sort":
            self.tracker.predict()

        detections = np.array(detections)
        if detections.shape[0] == 0:
            self.check_obsolete()
            return

        if self.ttype == "deep_sort":
            scores = np.array(scores)
            features = self.encoder(imgcv, detections.copy())
            detections = [
                self.Detection(bbox, score, feature)
                for bbox, score, feature in zip(detections, scores, features)
            ]
            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = self.prep.non_max_suppression(boxes,
                                                    self.nms_max_overlap,
                                                    scores)
            detections = [detections[i] for i in indices]
            self.tracker.update(detections)
            trackers = self.tracker.tracks

        elif self.ttype == "sort":
            trackers = self.tracker.update(detections)

        for track in trackers:
            if self.ttype == "deep_sort":
                if not track.is_confirmed(
                ) or track.time_since_update > 1:  #param
                    continue
                bbox = track.to_tlbr()
                bbox = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
                id_num = int(track.track_id)
                self.add_trackers(id_num, bbox)
            elif self.ttype == "sort":
                bbox = [
                    track[0], track[1], track[2] - track[0],
                    track[3] - track[1]
                ]
                id_num = int(track[4])
                self.add_trackers(id_num, bbox)

        self.check_obsolete()
        # print len(self.trackers)

    def add_trackers(self, id_num, bbox):
        tracker = self.trackers.get(id_num, Tracker())
        tracker.bbox = bbox
        tracker.consecutive_invisible_count = 0
        self.trackers[id_num] = tracker

    # @jit
    def check_obsolete(self):
        to_delete = []
        for id_num, tracker in self.trackers.items():
            tracker.consecutive_invisible_count += 1
            if tracker.consecutive_invisible_count > INVISIBLE_THRESH:
                to_delete.append(id_num)
        for id in to_delete:
            del self.trackers[id]
Exemple #11
0
def track(data_file, reverse=False, verbose=0):
    if (verbose == 1):
        print("Opening File...")

    f = h5py.File(data_file, "r+")
    mot_tracker = Sort()
    tracks_n = f["tracks_n"].value[0]

    start_count = find_start_count(list(f.keys()))

    if (not reverse):
        frame_indices = range(start_count, f['frame_number'].value[0])
    else:
        frame_indices = reversed(range(start_count,
                                       f['frame_number'].value[0]))

    if (verbose == 1):
        print("Starting loop...")
    for i in frame_indices:
        frame = "frame{}".format(i)

        bbox_handle = f[frame]['rois']
        detection = bbox_handle.value

        scores = f[frame]['scores'].value
        number_of_masks = scores.shape[0]

        detection_with_scores = np.hstack(
            (detection, np.reshape(scores, (-1, 1))))
        if (verbose == 1):
            print("detections with scores:")
            print(detection_with_scores)

        track_bbs_ids = mot_tracker.update(detection_with_scores)

        if (verbose == 1):
            print("tracked bbs:")
            print(track_bbs_ids)

        # Associate the track_BBs with the original bbs
        # for each of the track bbs
        # find the nearest neighbour in the original detections
        # associate the ID with the index of the original detection

        index_array = np.zeros(number_of_masks)

        if verbose == 1: print("number of masks {}".format(number_of_masks))

        for track in track_bbs_ids:
            nn_index = find_nn(track[:-1], detection)
            index_array[nn_index] = track[-1]

        if (verbose == 1):
            print("The index array is")
            print(index_array)

        max_idx = np.amax(index_array) if number_of_masks > 0 else 0
        if (max_idx > tracks_n):
            tracks_n = max_idx

        ID_dataset_key = "{}/IDs".format(frame)

        if (ID_dataset_key in f):
            f[ID_dataset_key][:, 1] = index_array
        else:
            f.create_dataset(ID_dataset_key, (index_array.shape[0], 2))
            f[ID_dataset_key][:, 0] = index_array

    f["tracks_n"][0] = tracks_n

    KalmanBoxTracker.count = 0

    f.close()
Exemple #12
0
class MOT():
    def __init__(self, **kwargs):
        print(kwargs)
        self._state = {}
        self._statistics = []
        self._class_count = dict(
            zip(kwargs['class_ids'], np.zeros(len(kwargs['class_ids']))))
        # if distance between centers of two bboxes is less than _max_distance then object is staying
        self._max_distance = kwargs[
            'max_distance'] if 'max_distance' in kwargs else DEFAULT_MAX_DISTANCE_BETWEEN_POINTS

        # after _warmup_frames we start to compare bbox's centers for one tracked object
        self._warmup_frames = kwargs[
            'warmup_frames'] if 'warmup_frames' in kwargs else DEFAULT_WARMUP_FRAMES

        self._line_y = kwargs['line_y'] if 'line_y' in kwargs else 0

        min_hits = kwargs[
            'min_hits'] if 'min_hits' in kwargs else DEFAUTL_MIN_HITS
        max_age = kwargs['max_age'] if 'max_age' in kwargs else DEFAULT_MAX_AGE
        #self.display_config()
        self._mot_tracker = Sort(max_age, min_hits)

    def display_config(self):
        print('line_y')
        print(self._line_y)
        print('warmup_frames')
        print(self._warmup_frames)
        print('max_distance')
        print(self._max_distance)

    def update_state(self, boxes, scores, classes, timestamp):
        dets = np.array(boxes)
        dets = np.hstack((dets, scores.reshape(scores.shape[0], 1)))
        trackers, matched, unmatched_dets = self._mot_tracker.update(dets)
        boxes, scores, classes, ids = self.mot_output_postprocess(
            trackers, boxes, scores, classes, matched, unmatched_dets)
        filtered_inds, object_crossed = self.filter_moving_obj_ids(
            boxes, scores, classes, ids)

        if len(object_crossed) > 0:
            self._statistics.append({
                'timestamp': timestamp,
                'class_count': self._class_count.copy(),
                'objects': object_crossed
            })

        scores = scores.reshape((scores.shape[0], ))
        classes = classes.reshape((classes.shape[0], ))
        classes = classes.astype(int)

        return filtered_inds, boxes, scores, classes, ids

    def filter_moving_obj_ids(self, boxes, scores, classes, ids):
        filtered_inds = set()
        object_crossed = []
        for i, obj_id in enumerate(ids):
            top, left, bottom, right = boxes[i]
            w = right - left
            h = bottom - top
            x_c = left + w / 2
            y_c = top + h / 2
            if obj_id in self._state:
                state_obj = self._state[obj_id]
                if state_obj['frame_num'] < self._warmup_frames:
                    state_obj['frame_num'] += 1
                    self._state[obj_id] = state_obj
                else:
                    if not self.is_close([x_c, y_c], state_obj['origin_pos']) and \
                        state_obj['origin_pos'][1] < y_c:
                        filtered_inds.add(i)

                        if not state_obj['already_counted']:
                            origin_y = state_obj['origin_pos'][1]

                            if state_obj['origin_pos'][
                                    1] < self._line_y and y_c >= self._line_y:
                                self._class_count[classes[i]] += 1
                                state_obj['already_counted'] = True
                                self._state[obj_id] = state_obj
                                object_crossed.append([classes[i], scores[i]])

            else:
                new_obj = {
                    'frame_num': 1,
                    'origin_pos': [x_c, y_c],
                    'already_counted': False
                }
                self._state[obj_id] = new_obj

        return filtered_inds, object_crossed

    def mot_output_postprocess(self, trackers, boxes, scores, classes, matched,
                               unmatched_dets):
        trackers = trackers[::-1]

        matched = matched[matched[:, 1].argsort()]
        new_ind = matched[:, 0]

        boxes_unmathced = np.empty((0, 4))
        scores_unmathced = np.empty((0, 1))
        classes_unmathced = np.empty((0, 1))
        if len(unmatched_dets) > 0:
            boxes_unmathced = boxes.take(unmatched_dets, axis=0)
            scores_unmathced = scores.take(unmatched_dets, axis=0)
            classes_unmathced = classes.take(unmatched_dets, axis=0)

        boxes = trackers[:, 0:4]
        scores = scores.take(new_ind, axis=0)
        classes = classes.take(new_ind, axis=0)

        ids = trackers[:, 4]

        scores = scores.reshape(-1, 1)
        classes = classes.reshape(-1, 1)
        scores_unmathced = scores_unmathced.reshape(-1, 1)
        classes_unmathced = classes_unmathced.reshape(-1, 1)

        boxes = np.vstack((boxes, boxes_unmathced))
        scores = np.vstack((scores, scores_unmathced))
        classes = np.vstack((classes, classes_unmathced))

        scores = scores.reshape((-1, ))
        classes = classes.reshape((-1, ))

        return boxes, scores, classes, ids

    def get_class_count(self):
        return self._class_count

    def get_statistics(self):
        return self._statistics

    def is_close(self, point_1, point_2):
        dist = np.linalg.norm(np.array(point_1) - np.array(point_2))
        return dist < self._max_distance