Esempio n. 1
0
    def __init__(self, rf_model_code, enable_rf=True):
        _tracker_name, _tracker_param, model_name = self._get_setting()
        model_name = model_name.format(rf_model_code)
        if not enable_rf:
            model_name = model_name.replace(RF_type.format(rf_model_code), '')
        super(Pytracking_RF, self).__init__(name=model_name)
        self.enable_rf = enable_rf
        # create tracker
        tracker_info = Tracker(_tracker_name, _tracker_param, None)
        params = tracker_info.get_parameters()
        params.visualization = False
        params.debug = False
        params.visdom_info = {
            'use_visdom': False,
            'server': '127.0.0.1',
            'port': 8097
        }
        self.tracker = tracker_info.tracker_class(params)

        # create Refinement module
        if self.enable_rf:
            self.RF_module = RefineModule(refine_path.format(rf_model_code),
                                          selector_path,
                                          search_factor=sr,
                                          input_sz=input_sz)
Esempio n. 2
0
 def __init__(self,
              tracker_name='dimp',
              para_name='dimp50_vot19',
              refine_model_name='ARcm_coco_seg',
              threshold=0.15):
     self.THRES = threshold
     '''create tracker'''
     '''DIMP'''
     tracker_info = Tracker(tracker_name, para_name, None)
     params = tracker_info.get_parameters()
     params.visualization = False
     params.debug = False
     params.visdom_info = {
         'use_visdom': False,
         'server': '127.0.0.1',
         'port': 8097
     }
     self.dimp = tracker_info.tracker_class(params)
     '''Alpha-Refine'''
     project_path = os.path.join(os.path.dirname(__file__), '..', '..')
     refine_root = os.path.join(project_path,
                                'ltr/checkpoints/ltr/ARcm_seg/')
     refine_path = os.path.join(refine_root, refine_model_name)
     '''2020.4.25 input size: 384x384'''
     self.alpha = ARcm_seg(refine_path, input_sz=384)
Esempio n. 3
0
 def __init__(self, tracker_name='dimp', para_name='super_dimp', threshold=0.65):
     self.THRES = threshold
     '''create tracker'''
     tracker_info = Tracker(tracker_name, para_name, None)
     params = tracker_info.get_parameters()
     params.visualization = False
     params.debug = False
     params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097}
     self.base_tracker = tracker_info.tracker_class(params)
Esempio n. 4
0
    def local_init(self, image, init_bbox):
        local_tracker = Tracker('segm', 'default_params')
        params = local_tracker.get_parameters()

        debug_ = getattr(params, 'debug', 0)
        params.debug = debug_

        params.tracker_name = local_tracker.name
        params.param_name = local_tracker.parameter_name

        self.local_Tracker = local_tracker.tracker_class(params)
        self.local_Tracker.initialize(image, init_bbox)
Esempio n. 5
0
    def local_init(self, image, init_bbox):
        local_tracker = Tracker('dimp', 'dimp50')
        params = local_tracker.get_parameters()

        debug_ = getattr(params, 'debug', 0)
        params.debug = debug_

        params.tracker_name = local_tracker.name
        params.param_name = local_tracker.parameter_name

        self.local_Tracker = local_tracker.tracker_class(params)
        init_box = dict()
        init_box['init_bbox'] = init_bbox
        self.local_Tracker.initialize(image, init_box)
Esempio n. 6
0
def main():
    # load config
    dataset_root = '/media/zxy/Samsung_T5/Data/DataSets/LaSOT/LaSOT_test'

    # create tracker
    '''Pytracking-RF tracker'''
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)
    '''Refinement module'''
    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34_15sr_fcn/SEcmnet_ep0040-a.pth.tar"  # RF_CrsM_R34SR15FCN_a

    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-a.pth.tar"  # dimp_dimp50RF_CrsM_R34SR20FCN_a-0_1
    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-b.pth.tar"
    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-c.pth.tar"
    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-d.pth.tar"
    # refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEx_beta/SEcm_r34/SEcmnet_ep0040-e.pth.tar"

    refine_path = "/home/zxy/Desktop/AlphaRefine/experiments/SEbcm/SEbcm-8gpu/SEbcmnet_ep0040.pth.tar"  # RF_CrsM_ARv1_d

    selector_path = 1
    branches = ['corner', 'mask'][0:1]
    sr = 2.0
    input_sz = int(128 * sr)  # 2.0 by default

    RF_module = RefineModule(refine_path,
                             selector_path,
                             branches=branches,
                             search_factor=sr,
                             input_sz=input_sz)

    RF_type = 'RF_CrsM_R34SR20_e'
    # RF_type = 'RF_CrsM_ARv1'

    model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format(
        RF_type, selector_path) + '_%d' % (args.run_id)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        total_lost = 0

        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            '''对refinement module计时'''
            toc_refine = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                '''get RGB format image'''
                img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
                if idx == frame_counter:
                    H, W, _ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    '''Initialize'''
                    gt_bbox_np = np.array(gt_bbox_)
                    gt_bbox_torch = torch.from_numpy(
                        gt_bbox_np.astype(np.float32))
                    init_info = {}
                    init_info['init_bbox'] = gt_bbox_torch
                    _ = tracker.initialize(img_RGB, init_info)
                    '''##### initilize refinement module for specific video'''
                    RF_module.initialize(img_RGB, np.array(gt_bbox_))

                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    '''Track'''
                    outputs = tracker.track(img_RGB)
                    pred_bbox = outputs['target_bbox']
                    '''##### refine tracking results #####'''
                    result_dict = RF_module.refine(img_RGB,
                                                   np.array(pred_bbox))
                    bbox_report = result_dict['bbox_report']
                    bbox_state = result_dict['bbox_state']
                    '''report result and update state'''
                    pred_bbox = bbox_report
                    x1, y1, w, h = bbox_state.tolist()
                    '''add boundary and min size limit'''
                    x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                    w = x2 - x1
                    h = y2 - y1
                    new_pos = torch.from_numpy(
                        np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                    new_target_sz = torch.from_numpy(
                        np.array([h, w]).astype(np.float32))
                    new_scale = torch.sqrt(new_target_sz.prod() /
                                           tracker.base_target_sz.prod())
                    ##### update
                    tracker.pos = new_pos.clone()
                    tracker.target_sz = new_target_sz
                    tracker.target_scale = new_scale

                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if len(pred_bbox) == 8:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join(save_dir, args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))

    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                '''get RGB format image'''
                img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
                tic = cv2.getTickCount()
                if idx == 0:
                    H, W, _ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    '''Initialize'''
                    gt_bbox_np = np.array(gt_bbox_)
                    gt_bbox_torch = torch.from_numpy(
                        gt_bbox_np.astype(np.float32))
                    init_info = {}
                    init_info['init_bbox'] = gt_bbox_torch
                    _ = tracker.initialize(img_RGB, init_info)
                    '''##### initilize refinement module for specific video'''
                    RF_module.initialize(img_RGB, np.array(gt_bbox_))

                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    '''Track'''
                    outputs = tracker.track(img_RGB)
                    pred_bbox = outputs['target_bbox']
                    '''##### refine tracking results #####'''
                    pred_bbox = RF_module.refine(
                        cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                        np.array(pred_bbox))
                    x1, y1, w, h = pred_bbox.tolist()
                    '''add boundary and min size limit'''
                    x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                    w = x2 - x1
                    h = y2 - y1
                    new_pos = torch.from_numpy(
                        np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                    new_target_sz = torch.from_numpy(
                        np.array([h, w]).astype(np.float32))
                    new_scale = torch.sqrt(new_target_sz.prod() /
                                           tracker.base_target_sz.prod())
                    ##### update
                    tracker.pos = new_pos.clone()
                    tracker.target_sz = new_target_sz
                    tracker.target_scale = new_scale

                    pred_bboxes.append(pred_bbox)
                    # scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join(save_dir, args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join(save_dir, args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join(save_dir, args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
Esempio n. 7
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)

    model_name = args.tracker_name + '_' + args.tracker_param + '_%d' % (
        args.run_id)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(
                os.path.join(save_dir, args.dataset, model_name,
                             '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)

                pred_bbox = gt_bbox_
                scores.append(None)
                if 'VOT2018-LT' == args.dataset:
                    pred_bboxes.append([1])
                else:
                    pred_bboxes.append(pred_bbox)
            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']
                '''##### refine tracking results #####'''
                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 255, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()

        # save results
        model_path = os.path.join(save_dir, args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
Esempio n. 8
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)

    model_name = args.tracker_name + '_' + args.tracker_param

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)

    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        total_lost = 0

        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            '''对refinement module计时'''
            toc_refine = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                '''get RGB format image'''
                img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
                if idx == frame_counter:
                    H, W, _ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    '''Initialize'''
                    gt_bbox_np = np.array(gt_bbox_)
                    gt_bbox_torch = torch.from_numpy(
                        gt_bbox_np.astype(np.float32))
                    init_info = {}
                    init_info['init_bbox'] = gt_bbox_torch
                    _ = tracker.initialize(img_RGB, init_info)

                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    '''Track'''
                    outputs = tracker.track(img_RGB)
                    pred_bbox = outputs['target_bbox']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if len(pred_bbox) == 8:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join(save_dir, args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
Esempio n. 9
0
def main():
    # load config
    model_name = args.tracker_name + '_' + args.tracker_param
    dataset_root = dataset_root_

    # create tracker
    '''Pytracking tracker'''
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097}
    tracker = tracker_info.tracker_class(params)

    # create dataset
    frames_dir = os.path.join(dataset_root,'frames')
    seq_list = sorted(os.listdir(frames_dir))

    # OPE tracking
    for v_idx, seq_name in enumerate(seq_list):
        if args.video != '':
            # test one special video
            if seq_name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        seq_frame_dir = os.path.join(frames_dir,seq_name)
        num_frames = len(os.listdir(seq_frame_dir))
        gt_file = os.path.join(dataset_root,'anno','%s.txt'%seq_name)
        gt_bbox = np.loadtxt(gt_file,dtype=np.float32,delimiter=',').squeeze()
        for idx in range(num_frames):
            frame_path = os.path.join(seq_frame_dir,'%d.jpg'%idx)
            img = cv2.imread(frame_path)
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)

                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']

                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                              (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                cv2.imshow(seq_name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        # save results
        model_path = os.path.join(save_dir, 'trackingnet', model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(seq_name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x])+'\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx+1, seq_name, toc, idx / toc))
Esempio n. 10
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)

    model_name = 'atom_oracle'

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(
                os.path.join(save_dir, args.dataset, model_name,
                             '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)

                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']

                x1, y1, w, h = pred_bbox
                '''add boundary and min size limit'''
                x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                w = x2 - x1
                h = y2 - y1
                pred_bbox = np.array([x1, y1, w, h])
                '''##### reset tracking results #####'''
                cx, cy, _, _ = get_axis_aligned_bbox(np.array(gt_bbox))
                if not gt_bbox == [0, 0, 0, 0]:
                    new_pos = torch.from_numpy(
                        np.array([cy, cx]).astype(np.float32))
                else:
                    new_pos = torch.from_numpy(
                        np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))

                new_target_sz = torch.from_numpy(
                    np.array([h, w]).astype(np.float32))
                new_scale = torch.sqrt(new_target_sz.prod() /
                                       tracker.base_target_sz.prod())
                ##### update
                tracker.pos = new_pos.clone()
                tracker.target_sz = new_target_sz
                tracker.target_scale = new_scale

                pred_bboxes.append(pred_bbox)

            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 255, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()

        # save results
        model_path = os.path.join(save_dir, args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))
Esempio n. 11
0
def main():
    # refine_method = args.refine_method
    model_name = 'siamrpn_' + refine_method
    model_path = '/'
    snapshot_path = os.path.join(
        project_path_, 'experiments/%s/model.pth' % args.tracker_name)
    config_path = os.path.join(
        project_path_, 'experiments/%s/config.yaml' % args.tracker_name)

    cfg.merge_from_file(config_path)
    dataset_root = dataset_root_

    # create model
    '''a model is a Neural Network.(a torch.nn.Module)'''
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, snapshot_path).cuda().eval()

    # build tracker
    '''a tracker is a object, which consists of not only a NN but also some post-processing'''
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)
    '''##### build a refinement module #####'''
    if 'RF' in refine_method:
        RF_module = RefineModule(refine_path,
                                 selector_path,
                                 branches=branches,
                                 search_factor=sr,
                                 input_sz=input_sz)

    elif refine_method == 'iou_net':
        RF_info = Tracker('iou_net', 'iou_net_dimp', None)
        RF_params = RF_info.get_parameters()
        RF_params.visualization = False
        RF_params.debug = False
        RF_params.visdom_info = {
            'use_visdom': False,
            'server': '127.0.0.1',
            'port': 8097
        }
        RF_module = RF_info.tracker_class(RF_params)

    elif refine_method == 'mask':
        RF_module = siammask()
    else:
        raise ValueError("refine_method should be 'RF' or 'iou' or 'mask' ")
    # model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    H, W, _ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    '''##### initilize refinement module for specific video'''
                    if 'RF' in refine_method:
                        RF_module.initialize(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                            np.array(gt_bbox_))
                    elif refine_method == 'iou_net':
                        gt_bbox_np = np.array(gt_bbox_)
                        gt_bbox_torch = torch.from_numpy(
                            gt_bbox_np.astype(np.float32))
                        init_info = {}
                        init_info['init_bbox'] = gt_bbox_torch
                        RF_module.initialize(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB), init_info)
                    elif refine_method == 'mask':
                        RF_module.initialize(img, np.array(gt_bbox_))
                    else:
                        raise ValueError(
                            "refine_method should be 'RF' or 'RF_mask' or 'iou_net' or 'mask' "
                        )
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    '''##### refine tracking results #####'''
                    if 'RF' in refine_method or refine_method == 'iou_net':
                        pred_bbox = RF_module.refine(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                            np.array(pred_bbox))
                        x1, y1, w, h = pred_bbox.tolist()
                        '''add boundary and min size limit'''
                        x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h,
                                                   (H, W))
                        w = x2 - x1
                        h = y2 - y1
                        pred_bbox = np.array([x1, y1, w, h])
                        '''pass new state back to base tracker'''
                        tracker.center_pos = np.array([x1 + w / 2, y1 + h / 2])
                        tracker.size = np.array([w, h])
                    elif refine_method == 'mask':
                        pred_bbox, center_pos, size = RF_module.refine(
                            img, np.array(pred_bbox), VOT=True)
                        # boundary and min size limit have been included in "refine"
                        '''pass new state back to base tracker'''
                        '''pred_bbox is a list with 8 elements'''
                        tracker.center_pos = center_pos
                        tracker.size = size
                    else:
                        raise ValueError(
                            'refine_method should be RF or iou or mask')
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5  # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if refine_method == 'mask':
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join(save_dir, args.dataset, model_name,
                                      'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            print(
                '({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if video.name + '.txt' in os.listdir(model_path):
                continue
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    H, W, _ = img.shape
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    '''##### initilize refinement module for specific video'''
                    if 'RF' in refine_method:
                        RF_module.initialize(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                            np.array(gt_bbox_))
                    elif refine_method == 'iou_net':
                        gt_bbox_np = np.array(gt_bbox_)
                        gt_bbox_torch = torch.from_numpy(
                            gt_bbox_np.astype(np.float32))
                        init_info = {}
                        init_info['init_bbox'] = gt_bbox_torch
                        RF_module.initialize(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB), init_info)
                    elif refine_method == 'mask':
                        RF_module.initialize(img, np.array(gt_bbox_))
                    else:
                        raise ValueError(
                            "refine_method should be 'RF' or 'iou' or 'mask' ")
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    '''##### refine tracking results #####'''
                    if 'RF' in refine_method or refine_method == 'iou_net':
                        pred_bbox = RF_module.refine(
                            cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                            np.array(pred_bbox))
                    elif refine_method == 'mask':
                        pred_bbox = RF_module.refine(img,
                                                     np.array(pred_bbox),
                                                     VOT=False)
                    else:
                        raise ValueError(
                            "refine_method should be 'RF' or 'iou' or 'mask' ")
                    x1, y1, w, h = pred_bbox.tolist()
                    '''add boundary and min size limit'''
                    x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                    w = x2 - x1
                    h = y2 - y1
                    pred_bbox = np.array([x1, y1, w, h])
                    tracker.center_pos = np.array([x1 + w / 2, y1 + h / 2])
                    tracker.size = np.array([w, h])

                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append(
                    (cv2.getTickCount() - tic) / cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(
                        img, (gt_bbox[0], gt_bbox[1]),
                        (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                        (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0] + pred_bbox[2],
                                   pred_bbox[1] + pred_bbox[3]), (0, 255, 255),
                                  3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join(save_dir, args.dataset, model_name,
                                          'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(
                    video_path, '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write(
                            "{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join(save_dir, args.dataset, model_name,
                                          video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                                           '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
                result_path = os.path.join(video_path,
                                           '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join(
                    save_dir, args.dataset,
                    model_name + '_' + str(selector_path))
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path,
                                           '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x]) + '\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.
                  format(v_idx + 1, video.name, toc, idx / toc))
Esempio n. 12
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)
    '''Refinement module'''
    RF_module = RefineModule(refine_path,
                             selector_path,
                             search_factor=sr,
                             input_sz=input_sz)
    model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format(
        RF_type, selector_path) + '_%d' % (args.run_id)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        color = np.array(COLORS[random.randint(0,
                                               len(COLORS) - 1)])[None,
                                                                  None, ::-1]
        vis_result = os.path.join(
            '/home/zxy/Desktop/AlphaRefine/CVPR21/material/quality_analysis/mask_vis',
            '{}'.format(video.name))

        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
            else:
                print()

        if not os.path.exists(vis_result):
            os.makedirs(vis_result)

        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)
                '''##### initilize refinement module for specific video'''
                RF_module.initialize(img_RGB, np.array(gt_bbox_))

                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']
                '''##### refine tracking results #####'''
                pred_bbox = RF_module.refine(
                    cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox))

                x1, y1, w, h = pred_bbox.tolist()
                '''add boundary and min size limit'''
                x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                w = x2 - x1
                h = y2 - y1
                new_pos = torch.from_numpy(
                    np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                new_target_sz = torch.from_numpy(
                    np.array([h, w]).astype(np.float32))
                new_scale = torch.sqrt(new_target_sz.prod() /
                                       tracker.base_target_sz.prod())
                ##### update
                tracker.pos = new_pos.clone()
                tracker.target_sz = new_target_sz
                tracker.target_scale = new_scale

                mask_pred = RF_module.get_mask(
                    cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox))
                from external.pysot.toolkit.visualization import draw_mask
                draw_mask(img,
                          mask_pred,
                          idx=idx,
                          show=True,
                          save_dir='dimpsuper_armask_crocodile-3')

                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                im4show = img
                mask_pred = np.uint8(mask_pred > 0.5)[:, :, None]
                contours, _ = cv2.findContours(mask_pred.squeeze(),
                                               cv2.RETR_LIST,
                                               cv2.CHAIN_APPROX_SIMPLE)
                im4show = im4show * (1 - mask_pred) + np.uint8(
                    im4show * mask_pred /
                    2) + mask_pred * np.uint8(color) * 128
                pred_bbox = list(map(int, pred_bbox))
                # gt_bbox = list(map(int, gt_bbox))
                # cv2.rectangle(im4show, (gt_bbox[0], gt_bbox[1]),
                #               (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)

                # cv2.rectangle(im4show, (pred_bbox[0], pred_bbox[1]),
                #               (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), color[::-1].squeeze().tolist(), 3)

                cv2.drawContours(im4show, contours, -1, color[::-1].squeeze(),
                                 2)
                cv2.putText(im4show, str(idx), (40, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                # cv2.imshow(video.name, im4show)
                cv2.imwrite(os.path.join(vis_result, '{:06}.jpg'.format(idx)),
                            im4show)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
def main():
    # load config
    model_name = args.tracker_name + '_' + args.tracker_param + '_' + RF_type; print(model_name)
    dataset_root = dataset_root_

    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097}
    tracker = tracker_info.tracker_class(params)

    # setup refine module
    RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz)

    # create dataset
    frames_dir = os.path.join(dataset_root, 'frames')
    seq_list = sorted(os.listdir(frames_dir))

    # OPE tracking
    for v_idx, seq_name in enumerate(seq_list):
        if args.video != '':
            # test one special video
            if seq_name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        seq_frame_dir = os.path.join(frames_dir, seq_name)
        num_frames = len(os.listdir(seq_frame_dir))
        gt_file = os.path.join(dataset_root, 'anno', '%s.txt' % seq_name)
        gt_bbox = np.loadtxt(gt_file, dtype=np.float32, delimiter=',').squeeze()
        for idx in range(num_frames):
            frame_path = os.path.join(seq_frame_dir, '%d.jpg' % idx)
            img = cv2.imread(frame_path)
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)
                '''##### initilize refinement module for specific video'''
                RF_module.initialize(img_RGB, np.array(gt_bbox_))
                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']
                '''##### refine tracking results #####'''
                pred_bbox = RF_module.refine(cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                                             np.array(pred_bbox))
                x1, y1, w, h = pred_bbox.tolist()
                '''add boundary and min size limit'''
                x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                w = x2 - x1
                h = y2 - y1
                new_pos = torch.from_numpy(np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                new_target_sz = torch.from_numpy(np.array([h, w]).astype(np.float32))
                new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod())
                ##### update
                tracker.pos = new_pos.clone()
                tracker.target_sz = new_target_sz
                tracker.target_scale = new_scale

                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append((cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                              (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                cv2.imshow(seq_name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        # save results
        model_path = os.path.join(save_dir, 'trackingnet', model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(seq_name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, seq_name, toc, idx / toc))
Esempio n. 14
0
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import vot
from vot import Rectangle, Polygon, Point

del os.environ[
    'MKL_NUM_THREADS']  #note:todo when called from matlab, it should be added

import torch
import cv2
import PIL.Image as Image

from pytracking.evaluation import Tracker

tracker = Tracker('ATCAIS_cpu', 'default')
tracker_name = tracker.name
parameters = tracker.get_parameters()
tracker_module = importlib.import_module('pytracking.tracker.{}'.format(
    tracker.name))
tracker_class = tracker_module.get_tracker_class()
tracker = tracker_class(parameters)


def overlay_boxes(image, state, image_file, tracker_name):

    state = torch.tensor(state).reshape(-1, 4)
    boxes = state.clone()
    boxes[:, 2:4] = state[:, 0:2] + state[:, 2:4]

    for box in boxes:
        box = box.to(torch.int64)
        top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
Esempio n. 15
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {'use_visdom': False, 'server': '127.0.0.1', 'port': 8097}
    tracker = tracker_info.tracker_class(params)

    '''Refinement module'''
    RF_module = RefineModule(refine_path, selector_path, search_factor=sr, input_sz=input_sz)
    model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format(RF_type, selector_path) + '_%d'%(args.run_id)
    model_name = 'LaSOT_gt'

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset, dataset_root=dataset_root_, load_img=False)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(os.path.join(save_dir, args.dataset, model_name, '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue

        pred_bboxes = []
        for idx, (img, gt_bbox) in enumerate(video):
            cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
            gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
            pred_bboxes.append(gt_bbox_)
            continue

            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)
                '''##### initilize refinement module for specific video'''
                RF_module.initialize(img_RGB, np.array(gt_bbox_))

                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)

            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']

                ''' refine tracking results '''
                pred_bbox = RF_module.refine(cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
                                             np.array(pred_bbox))
                x1, y1, w, h = pred_bbox.tolist()
                w, h = get_mean_wh(pred_bboxes, w, h)

                '''add boundary and min size limit'''
                x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                w = x2 - x1
                h = y2 - y1
                new_pos = torch.from_numpy(np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                new_target_sz = torch.from_numpy(np.array([h, w]).astype(np.float32))
                new_scale = torch.sqrt(new_target_sz.prod() / tracker.base_target_sz.prod())

                # update
                tracker.pos = new_pos.clone()
                tracker.target_sz = new_target_sz
                tracker.target_scale = new_scale

                pred_bboxes.append(pred_bbox)

            toc += cv2.getTickCount() - tic
            track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]),
                              (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)
                cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                              (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                k = cv2.waitKey(0)
                if k == ord('q'):
                    exit()
                elif k == ord('s'):
                    cv2.imwrite(os.path.join(os.environ['HOME'], 'Desktop/demo', video.name+'_{}.jpg'.format(idx)), img)

        # save results
        model_path = os.path.join(save_dir, args.dataset, model_name)
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        result_path = os.path.join(model_path, '{}.txt'.format(video.name))
        with open(result_path, 'w') as f:
            for x in pred_bboxes:
                f.write(','.join([str(i) for i in x])+'\n')
        print(video.name)
Esempio n. 16
0
def main():
    # create tracker
    tracker_info = Tracker(args.tracker_name, args.tracker_param, None)
    params = tracker_info.get_parameters()
    params.visualization = args.vis
    params.debug = args.debug
    params.visdom_info = {
        'use_visdom': False,
        'server': '127.0.0.1',
        'port': 8097
    }
    tracker = tracker_info.tracker_class(params)

    # setup refine module
    RF_module = RefineModule(refine_path,
                             selector_path,
                             search_factor=sr,
                             input_sz=input_sz)
    model_name = args.tracker_name + '_' + args.tracker_param + '{}-{}'.format(
        RF_type, selector_path) + '_%d' % (args.run_id)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root_,
                                            load_img=False)

    # OPE tracking
    for v_idx, video in enumerate(dataset):
        if os.path.exists(
                os.path.join(save_dir, args.dataset, model_name,
                             '{}.txt'.format(video.name))):
            continue
        if args.video != '':
            # test one special video
            if video.name != args.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            '''get RGB format image'''
            img_RGB = img[:, :, ::-1].copy()  # BGR --> RGB
            tic = cv2.getTickCount()
            if idx == 0:
                H, W, _ = img.shape
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                '''Initialize'''
                gt_bbox_np = np.array(gt_bbox_)
                gt_bbox_torch = torch.from_numpy(gt_bbox_np.astype(np.float32))
                init_info = {}
                init_info['init_bbox'] = gt_bbox_torch
                _ = tracker.initialize(img_RGB, init_info)
                '''##### initilize refinement module for specific video'''
                RF_module.initialize(img_RGB, np.array(gt_bbox_))

                pred_bbox = gt_bbox_
                scores.append(None)
                if 'VOT2018-LT' == args.dataset:
                    pred_bboxes.append([1])
                else:
                    pred_bboxes.append(pred_bbox)
            else:
                '''Track'''
                outputs = tracker.track(img_RGB)
                pred_bbox = outputs['target_bbox']
                '''##### refine tracking results #####'''
                pred_bbox = RF_module.refine(
                    cv2.cvtColor(img, cv2.COLOR_BGR2RGB), np.array(pred_bbox))
                x1, y1, w, h = pred_bbox.tolist()
                '''add boundary and min size limit'''
                x1, y1, x2, y2 = bbox_clip(x1, y1, x1 + w, y1 + h, (H, W))
                w = x2 - x1
                h = y2 - y1
                new_pos = torch.from_numpy(
                    np.array([y1 + h / 2, x1 + w / 2]).astype(np.float32))
                new_target_sz = torch.from_numpy(
                    np.array([h, w]).astype(np.float32))
                new_scale = torch.sqrt(new_target_sz.prod() /
                                       tracker.base_target_sz.prod())
                ##### update
                tracker.pos = new_pos.clone()
                tracker.target_sz = new_target_sz
                tracker.target_scale = new_scale

                pred_bboxes.append(pred_bbox)
                # scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if args.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 255, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        # save results
        if 'VOT2018-LT' == args.dataset:
            video_path = os.path.join(save_dir, args.dataset, model_name,
                                      'longterm', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    f.write(','.join([str(i) for i in x]) + '\n')
            result_path = os.path.join(
                video_path, '{}_001_confidence.value'.format(video.name))
            with open(result_path, 'w') as f:
                for x in scores:
                    f.write('\n') if x is None else f.write(
                        "{:.6f}\n".format(x))
            result_path = os.path.join(video_path,
                                       '{}_time.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in track_times:
                    f.write("{:.6f}\n".format(x))
        elif 'GOT-10k' == args.dataset:
            video_path = os.path.join(save_dir, args.dataset, model_name,
                                      video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    f.write(','.join([str(i) for i in x]) + '\n')
            result_path = os.path.join(video_path,
                                       '{}_time.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in track_times:
                    f.write("{:.6f}\n".format(x))
        else:
            model_path = os.path.join(save_dir, args.dataset, model_name)
            if not os.path.isdir(model_path):
                os.makedirs(model_path)
            result_path = os.path.join(model_path, '{}.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    f.write(','.join([str(i) for i in x]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc, idx / toc))