Example #1
0
    def track_single_video(self, tracker, video, v_id=0):
        r"""
        track frames in single video with VOT rules

        Arguments
        ---------
        tracker: PipelineBase
            pipeline
        video: str
            video name
        v_id: int
            video id
        """
        vot_float2str = importlib.import_module(
            "videoanalyst.evaluation.vot_benchmark.pysot.utils.region",
            package="vot_float2str").vot_float2str
        regions = []
        scores = []
        times = []
        video = self.dataset[video]
        image_files, gt = video['image_files'], video['gt']
        start_frame, end_frame, toc = 0, len(image_files), 0
        vw = None

        for f, image_file in enumerate(tqdm(image_files)):
            im = vot_benchmark.get_img(image_file)
            im_show = im.copy().astype(np.uint8)
            if self._hyper_params["save_video"] and vw is None:
                fourcc = cv2.VideoWriter_fourcc(*'MJPG')
                video_path = os.path.join(self.save_video_dir,
                                          video['name'] + ".avi")
                width, height = im.shape[1], im.shape[0]
                vw = cv2.VideoWriter(video_path, fourcc, 25,
                                     (int(width), int(height)))
            tic = cv2.getTickCount()
            if f == start_frame:  # init
                cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f])
                location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h))
                tracker.init(im, location)
                regions.append(1)
                scores.append(None)
            elif f > start_frame:  # tracking
                location = tracker.update(im)
                regions.append(location)
                scores.append(tracker._state["pscore"])
            toc += cv2.getTickCount() - tic
            if self._hyper_params["save_video"]:
                cv2.rectangle(im_show, (int(location[0]), int(location[1])),
                              (int(location[0] + location[2]),
                               int(location[1] + location[3])), (255, 0, 0), 2)
                cv2.putText(im_show, str(scores[-1]), (40, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                vw.write(im_show)
        if vw is not None:
            vw.release()

        toc /= cv2.getTickFrequency()

        # save result
        result_dir = join(self.save_root_dir, video['name'])
        ensure_dir(result_dir)
        result_path = join(result_dir, '{:s}_001.txt'.format(video['name']))
        with open(result_path, "w") as fin:
            for x in regions:
                fin.write("{:d}\n".format(x)) if isinstance(x, int) else \
                    fin.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n')
        result_path = os.path.join(
            result_dir, '{}_001_confidence.value'.format(video['name']))
        with open(result_path, 'w') as fin:
            for x in scores:
                fin.write('\n') if x is None else fin.write(
                    "{:.6f}\n".format(x))
        logger.info(
            '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}'.format(
                v_id, video['name'], toc, f / toc))

        return f / toc
Example #2
0
    def track_single_video(self, tracker, video, v_id=0):
        r"""
        track frames in single video with VOT rules

        Arguments
        ---------
        tracker: PipelineBase
            pipeline
        video: str
            video name
        v_id: int
            video id
        """
        regions = []
        video = self.dataset[video]
        image_files, gt = video['image_files'], video['gt']
        start_frame, end_frame, lost_times, toc = 0, len(image_files), 0, 0
        for f, image_file in enumerate(tqdm(image_files)):
            im = vot_benchmark.get_img(image_file)
            im_show = im.copy().astype(np.uint8)

            tic = cv2.getTickCount()
            if f == start_frame:  # init
                cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f])
                location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h))
                tracker.init(im, location)
                regions.append(1 if 'VOT' in self.dataset_name else gt[f])
                gt_polygon = None
                pred_polygon = None
            elif f > start_frame:  # tracking
                location = tracker.update(im)

                gt_polygon = (gt[f][0], gt[f][1], gt[f][2], gt[f][3], gt[f][4],
                              gt[f][5], gt[f][6], gt[f][7])
                pred_polygon = (location[0], location[1],
                                location[0] + location[2], location[1],
                                location[0] + location[2],
                                location[1] + location[3], location[0],
                                location[1] + location[3])
                b_overlap = vot_benchmark.vot_overlap(
                    gt_polygon, pred_polygon, (im.shape[1], im.shape[0]))
                gt_polygon = ((gt[f][0], gt[f][1]), (gt[f][2], gt[f][3]),
                              (gt[f][4], gt[f][5]), (gt[f][6], gt[f][7]))
                pred_polygon = ((location[0], location[1]),
                                (location[0] + location[2],
                                 location[1]), (location[0] + location[2],
                                                location[1] + location[3]),
                                (location[0], location[1] + location[3]))

                if b_overlap:
                    regions.append(location)
                else:  # lost
                    regions.append(2)
                    lost_times += 1
                    start_frame = f + 5  # skip 5 frames
            else:  # skip
                regions.append(0)
            toc += cv2.getTickCount() - tic

        toc /= cv2.getTickFrequency()

        # save result
        result_dir = join(self.save_root_dir, video['name'])
        ensure_dir(result_dir)
        result_path = join(result_dir, '{:s}_001.txt'.format(video['name']))
        with open(result_path, "w") as fin:
            for x in regions:
                fin.write("{:d}\n".format(x)) if isinstance(x, int) else \
                    fin.write(','.join([vot_benchmark.vot_float2str("%.4f", i) for i in x]) + '\n')

        logger.info(
            '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps Lost: {:d} '
            .format(v_id, video['name'], toc, f / toc, lost_times))

        return lost_times, f / toc
Example #3
0
    def track_single_video(self, tracker, video, v_id=0):
        r"""
        track frames in single video with VOT rules

        Arguments
        ---------
        tracker: PipelineBase
            pipeline
        video: str
            video name
        v_id: int
            video id
        """
        vot_overlap = importlib.import_module(
            "videoanalyst.evaluation.vot_benchmark.pysot.utils.region",
            package="vot_overlap").vot_overlap
        vot_float2str = importlib.import_module(
            "videoanalyst.evaluation.vot_benchmark.pysot.utils.region",
            package="vot_float2str").vot_float2str
        regions = []
        video = self.dataset[video]
        if self.test_video != '':
            # test one special video
            if video['name'] != self.test_video:
                return 0, 0
        image_files, gt = video['image_files'], video['gt']
        start_frame, end_frame, lost_times, toc = 0, len(image_files), 0, 0
        for f, image_file in enumerate(tqdm(image_files)):
            im = vot_benchmark.get_img(image_file)
            im_show = im.copy().astype(np.uint8)

            tic = cv2.getTickCount()
            if f == start_frame:  # init
                cx, cy, w, h = vot_benchmark.get_axis_aligned_bbox(gt[f])
                location = vot_benchmark.cxy_wh_2_rect((cx, cy), (w, h))
                tracker.init(im, location, gt[f])
                regions.append(1 if 'VOT' in self.dataset_name else gt[f])
                gt_polygon = None
                pred_polygon = None
                if self.vis:
                    cv2.destroyAllWindows()
            elif f > start_frame:  # tracking

                location = tracker.update(im)


                gt_polygon = (gt[f][0], gt[f][1], gt[f][2], gt[f][3], gt[f][4],
                              gt[f][5], gt[f][6], gt[f][7])
                pred_polygon = (location[0], location[1],
                                location[0] + location[2], location[1],
                                location[0] + location[2],
                                location[1] + location[3], location[0],
                                location[1] + location[3])
                b_overlap = vot_overlap(gt_polygon, pred_polygon,
                                        (im.shape[1], im.shape[0]))
                gt_polygon = ((gt[f][0], gt[f][1]), (gt[f][2], gt[f][3]),
                              (gt[f][4], gt[f][5]), (gt[f][6], gt[f][7]))
                pred_polygon = ((location[0], location[1]),
                                (location[0] + location[2],
                                 location[1]), (location[0] + location[2],
                                                location[1] + location[3]),
                                (location[0], location[1] + location[3]))
                # visualization
                if self.vis:
                    cv2.polylines(im_show, [np.array(gt_polygon, np.int).reshape((-1, 1, 2))],
                                  True, (0, 255, 0), 3)
                    cv2.polylines(im_show, [np.array(pred_polygon, np.int).reshape((-1, 1, 2))],
                                  True, (0, 255, 255), 3)
                    cv2.putText(im_show, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(im_show, str(lost_times), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video['name'], im_show)
                    cv2.waitKey(10)

                if b_overlap:
                    regions.append(location)
                else:  # lost
                    regions.append(2)
                    lost_times += 1
                    start_frame = f + 5  # skip 5 frames
            else:  # skip
                regions.append(0)
            toc += cv2.getTickCount() - tic

        toc /= cv2.getTickFrequency()

        # save result
        result_dir = join(self.save_root_dir, video['name'])
        ensure_dir(result_dir)
        result_path = join(result_dir, '{:s}_001.txt'.format(video['name']))
        with open(result_path, "w") as fin:
            for x in regions:
                fin.write("{:d}\n".format(x)) if isinstance(x, int) else \
                    fin.write(','.join([vot_float2str("%.4f", i) for i in x]) + '\n')

        logger.info(
            '({:d}) Video: {:12s} Time: {:02.1f}s Speed: {:3.1f}fps Lost: {:d} '
            .format(v_id, video['name'], toc, f / toc, lost_times))

        return lost_times, f / toc