Esempio n. 1
0
    def __init__(self, rf_model_code, enable_rf=True):
        model_name = 'RTMDNet' + '{}-{}'.format(RF_type.format(rf_model_code),
                                                selector_path)
        if not enable_rf:
            model_name = model_name.replace(RF_type.format(rf_model_code), '')
        super(RTMDNet_RF, self).__init__(name=model_name)
        self.enable_rf = enable_rf

        self.tracker = RT_MDNet()
        if self.enable_rf:
            self.RF_module = RefineModule(refine_path.format(rf_model_code),
                                          selector_path,
                                          search_factor=sr,
                                          input_sz=input_sz)
Esempio n. 2
0
 def __init__(self, threshold=0.65):
     self.THRES = threshold
     '''create tracker'''
     self.base_tracker = tracker = RT_MDNet(
     )  # a tracker is a object consisting of not only a NN and some post-processing
     '''Alpha-Refine'''
     self.alpha = RefineModule(refine_path, sr, input_sz=input_sz)
Esempio n. 3
0
class RTMDNet_RF(GOT10kTracker):
    def __init__(self, rf_model_code, enable_rf=True):
        model_name = 'RTMDNet' + '{}-{}'.format(RF_type.format(rf_model_code),
                                                selector_path)
        if not enable_rf:
            model_name = model_name.replace(RF_type.format(rf_model_code), '')
        super(RTMDNet_RF, self).__init__(name=model_name)
        self.enable_rf = enable_rf

        self.tracker = RT_MDNet()
        if self.enable_rf:
            self.RF_module = RefineModule(refine_path.format(rf_model_code),
                                          selector_path,
                                          search_factor=sr,
                                          input_sz=input_sz)

    def init(self, image, box):
        image = np.array(image)
        self.im_H, self.im_W, _ = image.shape
        cx, cy, w, h = get_axis_aligned_bbox(np.array(box))
        gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]

        # initialize tracker
        self.tracker.initialize_seq(image, np.array(gt_bbox_))
        # initilize refine module
        if self.enable_rf:
            self.RF_module.initialize(image, np.array(gt_bbox_))
        self.box = box

    def update(self, image):
        image = np.array(image)
        pred_bbox = self.tracker.track(image)

        if self.enable_rf:
            # refine tracking results
            pred_bbox = self.RF_module.refine(image, np.array(pred_bbox))
            pred_bbox = bbox_clip(
                pred_bbox, (self.im_H, self.im_W))  # boundary and size limit
            '''update state'''
            self.tracker.target_bbox = pred_bbox.copy()
        return pred_bbox
Esempio n. 4
0
def main(model_code):
    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False)

    '''##### build a Refinement module #####'''
    RF_module = RefineModule(refine_path.format(model_code), selector_path, search_factor=sr,
                             input_sz=input_sz)
    model_name = 'RT_MDNet' + '_{}-{}'.format(RF_type.format(model_code), selector_path) + '_%d' % (args.run_id)

    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            tracker = RT_MDNet()
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                img_RGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # RGB format
                if len(gt_bbox) == 4:
                    gt_bbox = [gt_bbox[0], gt_bbox[1],
                       gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    H,W,_ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    '''initialize tracker'''
                    tracker.initialize_seq(img_RGB, np.array(gt_bbox_))
                    '''initilize refine module for specific video'''
                    RF_module.initialize(img_RGB, np.array(gt_bbox_))
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    '''track'''
                    ori_bbox = tracker.track(img_RGB)
                    '''refine tracking result'''
                    pred_bbox = RF_module.refine(img_RGB, np.array(ori_bbox))
                    pred_bbox = bbox_clip(pred_bbox, (H, W))
                    tracker.target_bbox = pred_bbox.copy()
                    overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5 # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))],
                            True, (0, 255, 0), 3)
                    bbox = list(map(int, pred_bbox))
                    cv2.rectangle(img, (bbox[0], bbox[1]),
                                  (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()

            # save results
            video_path = os.path.join(save_dir, args.dataset, model_name,
                    'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format(
                    v_idx+1, video.name, toc, idx / toc, lost_number))
Esempio n. 5
0
def main():
    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)
    model_name = 'RTMDNet-oracle'

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(
                os.path.join(save_dir, args.dataset, model_name,
                             '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        '''build tracker'''
        tracker = RT_MDNet()
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # RGB format
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''initialize tracker'''
                tracker.initialize_seq(img_RGB, np.array(gt_bbox_))

                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                ori_bbox = tracker.track(img_RGB)
                pred_bbox = bbox_clip(ori_bbox, (H, W))
                oracle_box = pred_bbox.copy()
                cx, cy, _, _ = get_axis_aligned_bbox(np.array(gt_bbox))
                oracle_box[:2] = np.array([cx, cy]) - oracle_box[2:] / 2
                tracker.target_bbox = oracle_box
                pred_bboxes.append(pred_bbox)

            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                ori_bbox = list(map(int, ori_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 0, 255), 3)
                cv2.rectangle(img, (oracle_box[0], oracle_box[1]),
                              (oracle_box[0] + oracle_box[2],
                               oracle_box[1] + oracle_box[3]), (255, 0, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 0), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()

        # save results
        model_path = os.path.join(save_dir, args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
Esempio n. 6
0
def main():
    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)
    '''##### build a Refinement module #####'''
    RF_module = RefineModule(refine_path,
                             selector_path,
                             search_factor=sr,
                             input_sz=input_sz)
    model_name = 'RTMDNet' + '{}-{}'.format(RF_type, selector_path)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(
                os.path.join(save_dir, args.dataset, model_name,
                             '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        '''build tracker'''
        tracker = RT_MDNet()
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # RGB format
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''initialize tracker'''
                tracker.initialize_seq(img_RGB, np.array(gt_bbox_))
                '''initilize refine module for specific video'''
                RF_module.initialize(img_RGB, np.array(gt_bbox_))
                pred_bbox = gt_bbox_
                scores.append(None)
                if 'VOT2018-LT' == args.dataset:
                    pred_bboxes.append([1])
                else:
                    pred_bboxes.append(pred_bbox)
            else:
                ori_bbox = tracker.track(img_RGB)
                '''##### refine tracking results #####'''
                pred_bbox = RF_module.refine(img_RGB, np.array(ori_bbox))
                '''boundary and size limit'''
                pred_bbox = bbox_clip(pred_bbox, (H, W))
                '''update state'''
                tracker.target_bbox = pred_bbox.copy()
                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                ori_bbox = list(map(int, ori_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 0, 255), 3)
                cv2.rectangle(
                    img, (ori_bbox[0], ori_bbox[1]),
                    (ori_bbox[0] + ori_bbox[2], ori_bbox[1] + ori_bbox[3]),
                    (255, 0, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 0), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()

        # save results
        model_path = os.path.join(save_dir, args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
Esempio n. 7
0
 def __init__(self, threshold=0.65):
     self.THRES = threshold
     '''create tracker'''
     self.base_tracker = tracker = RT_MDNet(
     )  # a tracker is a object consisting of not only a NN and some post-processing
Esempio n. 8
0
def main():
    # create dataset
    dataset_root = dataset_root_
    frames_dir = os.path.join(dataset_root, 'frames')
    seq_list = sorted(os.listdir(frames_dir))

    model_name = 'RTMDNet'

    # OPE tracking
    for v_idx, seq_name in enumerate(seq_list):
        if args.video != '':
            # test one special video
            if seq_name != args.video:
                continue
        '''build tracker'''
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        tracker = RT_MDNet()
        seq_frame_dir = os.path.join(frames_dir, seq_name)
        num_frames = len(os.listdir(seq_frame_dir))
        gt_file = os.path.join(dataset_root, 'anno', '%s.txt' % seq_name)
        gt_bbox = np.loadtxt(gt_file, dtype=np.float32,
                             delimiter=',').squeeze()
        for idx in range(num_frames):
            frame_path = os.path.join(seq_frame_dir, '%d.jpg' % idx)
            img = cv2.imread(frame_path)
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''initialize tracker'''
                tracker.initialize_seq(img_RGB, np.array(gt_bbox_))
                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                pred_bbox = tracker.track(img_RGB)
                pred_bboxes.append(pred_bbox)

            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(seq_name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()

        # save results
        model_path = os.path.join(save_dir, 'trackingnet', model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(seq_name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, seq_name, toc, idx / toc))