Esempio n. 1
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(
            args.snapshot,
            map_location=lambda storage, loc: storage.cpu())['state_dict'])
    model.eval().to(device)

    # build tracker
    if cfg.RPN.TYPE == "YOLO":
        tracker = build_tracker(model, True)
    else:
        tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    writer = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc(*'XVID'),
                             25.0, (480, 360))

    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
            cv2.imshow(video_name, frame)
            writer.write(frame)
            cv2.waitKey(40)

    writer.release()
Esempio n. 2
0
def main():
  #############################################################################
  # initialize the tracker
  cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml")
  cfg.CUDA = torch.cuda.is_available()
  device = torch.device("cuda" if cfg.CUDA else "cpu")


  model = ModelBuilder()

  model.load_state_dict(
    torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth",
      map_location=lambda storage, loc: storage.cpu()))
  model.eval().to(device)

  tracker = build_tracker(model)


  #############################################################################
  # initialzie the benchmark parameter
  img = cv2.imread("image/benchmark_5.jpg")
  bbox = (131, 122, 92, 118)

  # calculate channle average
  channel_average = np.mean(img, axis=(0, 1))

  # EXEMPLAR_SIZE of mobilenetV2 is 127
  z_new_crop = get_subwindow(img, 127, bbox, channel_average)

  print(z_new_crop.mean(), "\t", z_new_crop.std())
def multi_processing(model, bbox, label, probs):
    """Conduct the tracker to track object according to key frame

    Args:
        model: track model
        bbox (N, 4): ground truth boxes.
        label (1, N): object labels.
        probs(1,N): Confidence

    Returns:
        res: return the track results and time consuming.
    """
    # set model on GPU
    model.eval().to('cuda')
    # build the tracker
    tracker = build_tracker(model,label,probs)
    init_rect = [bbox[0],bbox[1],bbox[2]-bbox[0],bbox[3]-bbox[1]]
    # read images from the folder
    # image_files = sorted(glob.glob('../datasets/demo/*.JPEG'))  # the path of images
    track_start_time = time.time()
    total_time = 0
    for f, image_file in enumerate(image_files):
        frame = cv2.imread(image_file)
        # initialize the tracker
        if f == 0:
            tracker.init(frame, init_rect)
            continue
        t1 = time.time()
        # track images
        output = tracker.track(frame)
        total_time += time.time()-t1
        stdOutput = [f, output, time.time()-t1]
    track_end_time = time.time()
    return [track_start_time, track_end_time, total_time, stdOutput]
Esempio n. 4
0
 def __init__(self, parent=None):
     super(MyMainWindow, self).__init__(parent)
     # Connect the on-clicked functions
     self.pushButton_locationLoading.clicked.connect(self.location_loading)
     self.pushButton_videoLoading.clicked.connect(self.video_loading)
     self.pushButton_cameraLoading.clicked.connect(self.camera_loading)
     self.pushButton_bboxSetting.clicked.connect(self.bbox_setting)
     self.pushButton_algorithmProcessing.clicked.connect(
         self.algorithm_processing)
     self.scrollBar.valueChanged.connect(self.slider_change)
     self.checkBox.stateChanged.connect(self.checkbox_change)
     # Message box ignore
     self.bbox_tips = True
     self.save_tips = True
     # Initialize trackers
     model_location = './pysot/experiments/siammaske_r50_l3'
     self.config = model_location + '/config.yaml'
     self.snapshot = model_location + '/model.pth'
     self.tracker_name = model_location.split('/')[-1]
     self.video_name = ''
     cfg.merge_from_file(self.config)
     cfg.CUDA = torch.cuda.is_available()
     device = torch.device('cuda' if cfg.CUDA else 'cpu')
     model = ModelBuilder()
     model.load_state_dict(
         torch.load(self.snapshot,
                    map_location=lambda storage, loc: storage.cpu()))
     model.eval().to(device)
     self.tracker = build_tracker(model)
     self.vs = None
     self.analysis_box = None
     self.analysis_max = 10
     self.save_location = ''
     self.afterCamera = False
Esempio n. 5
0
def main():
    # load config
    print("begin")
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)
    dataset_root = "/data/VisDrone Challenge/Single-Object Tracking/VisDrone2019-SOT-val/"
    # create model
    model = ModelBuilder()

    tracker = build_tracker(model)

    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)
    print(dataset.dataset_root)
    j =0
    for v_idx, video in enumerate(dataset):
        for idx, (img, gt_bbox) in enumerate(video):
            if idx == 0:
                cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                # 左上角坐标 ,w ,h 形式
                gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                # 初始化tracker 的box
                img=tracker.init(img, gt_bbox_)

                img =img.cpu().numpy()[0].transpose(1,2,0)

                cv2.imwrite("./Radio{:06d}.jpg".format(j),img)
                j += 1
            else:
                break
Esempio n. 6
0
    def __init__(self,classid=0,anchorid=0,init_frame=None,init_bbox=None):
        # load config
        cfg_path = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml'
        snapshot = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth'
        cfg.merge_from_file(cfg_path)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        # cfg.CUDA = False
        device = torch.device('cuda' if cfg.CUDA else 'cpu')
        # device='cpu'

        # create model
        model = ModelBuilder()

        # load model
        model.load_state_dict(torch.load(snapshot,
            map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        # build tracker
        tracker = build_tracker(model)
        
        self.tracker = tracker
        self.classId = classid
        self.anchorId = anchorid
        self.init_bbox = init_bbox
        self.init_frame = init_frame
Esempio n. 7
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(
            args.snapshot,
            map_location=lambda storage, loc: storage.cpu())['state_dict'])
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if cfg.TRANSFORMER.TRANSFORMER:
                acc, (x1, y1, x2, y2) = outputs
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
                cv2.putText(frame, 'Acc: ' + acc.astype('str'), (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
                cv2.imshow(video_name, frame)
                cv2.waitKey(40)
            else:
                if 'polygon' in outputs:
                    polygon = np.array(outputs['polygon']).astype(np.int32)
                    cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                                  (0, 255, 0), 3)
                    mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                    mask = mask.astype(np.uint8)
                    mask = np.stack([mask, mask * 255,
                                     mask]).transpose(1, 2, 0)
                    frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
                else:
                    bbox = list(map(int, outputs['bbox']))
                    cv2.rectangle(frame, (bbox[0], bbox[1]),
                                  (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                  (0, 255, 0), 3)
                cv2.imshow(video_name, frame)
                cv2.waitKey(40)
Esempio n. 8
0
def setup_tracker():
    cfg.merge_from_file(cfg_file)

    model = ModelBuilder()
    model = load_pretrain(model, model_file).cuda().eval()

    tracker = build_tracker(model)
    warmup(model)
    return tracker
Esempio n. 9
0
 def build_model(self):
     model = ModelBuilder()
     # load model
     model.load_state_dict(torch.load(args.snapshot,
                                      map_location=lambda storage, loc: storage.cpu()))
     model.eval().to(device)
     # build tracker
     tracker = build_tracker(model)
     return tracker
Esempio n. 10
0
 def __init__(self, config_file, model_path):
     super().__init__()
     cfg.merge_from_file(config_file)
     model = ModelBuilder()
     model.load_state_dict(
         torch.load(model_path,
                    map_location=lambda storage, loc: storage.cpu()))
     model.eval().cuda()
     self.tracker = build_tracker(model)
Esempio n. 11
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(args.snapshot,
                   map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    # cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
    i = 0
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True,
                              (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
            else:
                bbox = list(map(int, outputs['bbox']))
                cv2.rectangle(frame, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 3)
            # cv2.imshow(video_name, frame)
            # cv2.waitKey(40)
            print(i)
            cv2.imwrite(filename="/home/tempuser1/pysot/demo/ouput/" + str(i) +
                        '.jpg',
                        img=frame)
            i += 1
Esempio n. 12
0
 def build_model(self):
     model = ModelBuilder()
     # load model
     model.load_state_dict(torch.load(args.snapshot,
                                      map_location=lambda storage, loc: storage.cpu()))
     # import ipdb
     # ipdb.set_trace()
     device = torch.device('cuda:{}'.format(int(self.index//self.num_workers)) if cfg.CUDA else 'cpu')
     print(device)
     model.eval().to(device)
     # build tracker
     tracker = build_tracker(model)
     return tracker
Esempio n. 13
0
    def load_tracker(self, tracker_config, tracker_snapshot):
        """Load the selected pysot tracker.

        Args:
          - tracker_config (str): Path to pysot config file for the tracker
          - tracker_snapshot (str): Path to .pth file of pysot tracker
        """
        cfg.merge_from_file(tracker_config)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        device = torch.device('cuda' if cfg.CUDA else 'cpu')
        model = ModelBuilder()
        model.load_state_dict(torch.load(tracker_snapshot))
        model.eval().to(device)
        self.tracker = build_tracker(model)
Esempio n. 14
0
    def __init__(self):
        super(DROL, self).__init__("DROL")

        # load config
        cfg.merge_from_file(path_config.DROL_CONFIG)
        seed_torch(cfg.TRACK.SEED)

        # create model
        model = ModelBuilder()

        # load model
        model = load_pretrain(model, path_config.DROL_SNAPSHOT).cuda().eval()

        # build tracker
        self.tracker = build_tracker(model)
Esempio n. 15
0
def main():
    # load config
    cfg.merge_from_file(args.config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load(args.snapshot,
        map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    if args.video_name:
        video_name = args.video_name.split('/')[-1].split('.')[0]
    else:
        video_name = 'webcam'
    cv2.namedWindow(video_name, cv2.WINDOW_NORMAL)#cv2.WND_PROP_FULLSCREEN)
    for frame in get_frames(args.video_name):
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)#choose a rectangle as ROI
            except:
                exit()
            tracker.init(frame, init_rect)#initiating the tracker
            first_frame = False # choose the ROI on the first frame and then track it on the following frames 
        else:
            outputs = tracker.track(frame)#outputs:bbox/polygon+best_score
            if 'polygon' in outputs:
                polygon = np.array(outputs['polygon']).astype(np.int32)
                cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],#draw polygons([vertex_nums,1,2]) on the frame
                              True, (0, 255, 0), 3)
                mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
                mask = mask.astype(np.uint8)
                mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0)
                frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)#image fusion, can adjust transparency
            else:
                bbox = list(map(int, outputs['bbox']))#float to int
                cv2.rectangle(frame, (bbox[0], bbox[1]),#draw bbox on the frame
                              (bbox[0]+bbox[2], bbox[1]+bbox[3]),
                              (0, 255, 0), 3)
            cv2.imshow(video_name, frame)
            cv2.waitKey(40)
def PYSOTINIT():
    # load config
    cfg.merge_from_file(tracker_config)
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(
        torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)
    return tracker
Esempio n. 17
0
def fitness(config, reporter):
    # Only support VOT Dataset temporarily
    # print('debug:fitness')
    tracker_name = 'tracker_penalty_k={0:.3f},window_influence={1:.3f},lr={2:.3f},search_region={3}'.format(
        config['penalty_k'], config['window_influence'], config['lr'],
        config['search_region'])

    if args.dataset in ['VOT2016', 'VOT2017', 'VOT2018', 'VOT2019']:
        tracker = build_tracker(model, config)
        eao = calculate_eao(tracker, tracker_name)
        print(
            "penalty_k: {0:.3f}, lr: {1:.3f}, window_influence: {2:.3f}, search_region: {3}, eao: {4:.5f}"
            .format(config['penalty_k'], config['window_influence'],
                    config['lr'], config['search_region'], eao))
        reporter(EAO=eao)
    else:
        raise NotImplementedError
Esempio n. 18
0
    def __init__(self, config, snapshot):
        cfg.merge_from_file(config)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # create model
        self.model = ModelBuilder()

        # load model
        self.model.load_state_dict(
            torch.load(snapshot,
                       map_location=lambda storage, loc: storage.cpu()))
        self.model.eval().to(device)

        # build tracker
        self.tracker = build_tracker(self.model)
        self.center_pos = None
        self.size = None
Esempio n. 19
0
    def __init__(self, config_file, model_file):
        self.config_file = config_file
        self.model_file = model_file

        # load config
        cfg.merge_from_file(self.config_file)
        cfg.CUDA = torch.cuda.is_available()
        self.device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # load model
        self.model = ModelBuilder()
        self.model.load_state_dict(
            torch.load(model_file,
                       map_location=lambda storage, loc: storage.cpu()))
        self.model.eval().to(self.device)

        # build tracker
        self.tracker = build_tracker(self.model)
Esempio n. 20
0
    def init_track(self):
        # 参数整合
        cfg.merge_from_file(self.config_path)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # create model
        self.textBws_show_process.append('模型对象创建...')
        self.checkpoint=torch.load(self.snapshot_path, map_location=lambda storage, loc: storage.cpu())

        self.model = ModelBuilder()
        print('断点')
        # load model
        self.model.load_state_dict(self.checkpoint)

        self.model.eval().to(device)
        self.textBws_show_process.append('加载跟踪模型完毕!')
        # 创建跟踪器
        self.tracker = build_tracker(self.model)
Esempio n. 21
0
def test_snapshot(epoch: int, snapshot: str, test_path: str):
    # model
    max_img = 8
    model = ModelBuilder()
    data = torch.load(snapshot,
                      map_location=lambda storage, loc: storage.cpu())
    model.load_state_dict(data['state_dict'])
    model.eval().to(torch.device('cpu'))
    tracker = build_tracker(model)

    root = cfg.DATASET.COCO.ROOT
    cur_path = os.path.dirname(os.path.realpath(__file__))
    root = os.path.join(cur_path, '../../', root)
    anno_path = os.path.join(root, '../', "val2017.json")
    with open(anno_path, 'r') as f:
        anno = json.load(f)
        anno = filter_zero(anno)
    dataset = os.path.join(root, "val2017")
    folder = random.choice(glob.glob(f"{dataset}/**"))
    zs = glob.glob(f"{folder}/*.z.jpg")
    xs = glob.glob(f"{folder}/*.x.jpg")

    zs = sorted(zs)
    xs = sorted(xs)

    xs = [(x, get_anno_from_img_path(anno, x)) for x in xs]

    for i in range(len(zs[:max_img])):
        z = cv2.imread(zs[i])
        x_path, bbox = xs[i]
        x = cv2.imread(x_path)
        tracker.init_(z)
        cls, (x1, y1, x2, y2) = tracker.track(x)
        cv2.rectangle(x, (x1, y1), (x2, y2), (255, 0, 0), 2)
        a1, b1, a2, b2 = bbox
        cv2.rectangle(x, (a1, b1), (a2, b2), (0, 0, 255), 2)
        cv2.putText(x, 'Acc: ' + cls.astype('str'), (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        parent_dir = f"{test_path}/{os.path.basename(Path(zs[i]).parent)}"
        if not os.path.exists(parent_dir):
            os.makedirs(parent_dir)
        cv2.imwrite(f"{parent_dir}/{os.path.basename(x_path)}", x)
        cv2.imwrite(f"{parent_dir}/{os.path.basename(zs[i])}", z)
Esempio n. 22
0
    def __init__(self, backbone, target):
        super(SiamRPNPPGroup, self).__init__(f"SiamRPN++Group/{backbone}/{target}")

        if backbone == "AlexNet" and target == "OTB":
            config = path_config.SIAMRPNPP_ALEXNET_OTB_CONFIG
            snapshot = path_config.SIAMRPNPP_ALEXNET_OTB_SNAPSHOT
        elif backbone == "AlexNet" and target == "VOT":
            config = path_config.SIAMRPNPP_ALEXNET_CONFIG
            snapshot = path_config.SIAMRPNPP_ALEXNET_SNAPSHOT
        elif backbone == "ResNet-50" and target == "OTB":
            config = path_config.SIAMRPNPP_RESNET_OTB_CONFIG
            snapshot = path_config.SIAMRPNPP_RESNET_OTB_SNAPSHOT
        elif backbone == "ResNet-50" and target == "VOT":
            config = path_config.SIAMRPNPP_RESNET_CONFIG
            snapshot = path_config.SIAMRPNPP_RESNET_SNAPSHOT
        elif backbone == "ResNet-50" and target == "VOTLT":
            config = path_config.SIAMRPNPP_RESNET_LT_CONFIG
            snapshot = path_config.SIAMRPNPP_RESNET_LT_SNAPSHOT
        elif backbone == "MobileNetV2" and target == "VOT":
            config = path_config.SIAMRPNPP_MOBILENET_CONFIG
            snapshot = path_config.SIAMRPNPP_MOBILENET_SNAPSHOT
        elif backbone == "SiamMask" and target == "VOT":
            config = path_config.SIAMPRNPP_SIAMMASK_CONFIG
            snapshot = path_config.SIAMPRNPP_SIAMMASK_SNAPSHOT
        else:
            raise ValueError("Invalid backbone and target")

        # load config
        cfg.merge_from_file(config)
        cfg.CUDA = torch.cuda.is_available()
        device = torch.device("cuda" if cfg.CUDA else "cpu")

        # create model
        self.model = ModelBuilder()

        # load model
        self.model.load_state_dict(
            torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())
        )
        self.model.eval().to(device)

        # build tracker
        self.tracker = build_tracker(self.model)
Esempio n. 23
0
    def __init__(self):
        super(SiamRPNPP, self).__init__("SiamRPN++")
        config = path_config.SIAMRPNPP_CONFIG
        snapshot = path_config.SIAMRPNPP_SNAPSHOT

        # load config
        cfg.merge_from_file(config)
        cfg.CUDA = torch.cuda.is_available()
        device = torch.device("cuda" if cfg.CUDA else "cpu")

        # create model
        self.model = ModelBuilder()

        # load model
        self.model.load_state_dict(
            torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())
        )
        self.model.eval().to(device)

        # build tracker
        self.tracker = build_tracker(self.model)
Esempio n. 24
0
    def __init__(self):
        self.init_rect = None

        self.pysot_pub = rospy.Publisher(config.TRACK_PUB_TOPIC,
                                         Int32MultiArray,
                                         queue_size=10)
        self.img_sub = rospy.Subscriber(config.IMAGE_SUB_TOPIC, Image,
                                        self.receive_frame_and_track)
        self.service = rospy.Service("init_rect", InitRect, self.set_init_rect)

        cfg.TRACK.TYPE = config.TRACK_TYPE
        cfg.merge_from_file(config.CONFIG_PATH)
        cfg.CUDA = torch.cuda.is_available()
        device = torch.device('cuda' if cfg.CUDA else 'cpu')
        model = ModelBuilder()
        model.load_state_dict(
            torch.load(config.MODEL_PATH,
                       map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        self.tracker = build_tracker(model)
Esempio n. 25
0
    def __init__(self, tr):
        self.type = tr
        self.PYSOT_TRACKER_THRESHOLD = 0.3

        if tr in OPENCV_TRACKERS:
            self.tracker = OPENCV_TRACKERS[tr]()
        elif tr in PYSOT_TRACKERS:
            self.tracker = build_tracker(load_pysot_model(self.type))
        elif tr == "kalman":
            self.tracker = KalmanTracker()
        elif tr == "flow_LK_mean":
            self.tracker = LKFlowTracker(strategy="mean")
        elif tr == "flow_LK_median":
            self.tracker = LKFlowTracker(strategy="median")
        elif tr == "flow_GF_mean":
            self.tracker = GFFlowTracker(strategy="mean")
        elif tr == "flow_GF_median":
            self.tracker = GFFlowTracker(strategy="median")
        elif tr == "iou":
            self.tracker = StaticTracker()
        else:
            raise Exception("Tracker not supported")
Esempio n. 26
0
    def init_track(self):

        # 配置config文件
        config_path = './models/siamrpn_alex_dwxcorr/config.yaml'
        # 配置snapshot 文件
        snapshot_path = './models/siamrpn_alex_dwxcorr/model.pth'

        # 参数整合
        cfg.merge_from_file(config_path)
        cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
        device = torch.device('cuda' if cfg.CUDA else 'cpu')

        # create model
        model = ModelBuilder()

        # load model
        model.load_state_dict(
            torch.load(snapshot_path,
                       map_location=lambda storage, loc: storage.cpu()))
        model.eval().to(device)

        # 创建跟踪器
        self.tracker = build_tracker(model)
Esempio n. 27
0
def main():
    # load config
    cfg.merge_from_file(args.config)

    cur_dir = os.path.dirname(os.path.realpath(__file__))
    dataset_root = os.path.join(cur_dir, '../testing_dataset', args.dataset)

    # create model
    model = ModelBuilder(cfg)

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.snapshot.split('/')[-1].split('.')[0]
    total_lost = 0
    if args.dataset in ['VOT2016', 'VOT2018', 'VOT2019']:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [gt_bbox[0], gt_bbox[1],
                       gt_bbox[0], gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]+gt_bbox[3]-1,
                       gt_bbox[0]+gt_bbox[2]-1, gt_bbox[1]]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox, (img.shape[1], img.shape[0]))
                    if overlap > 0:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + 5 # skip 5 frames
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(img, [np.array(gt_bbox, np.int).reshape((-1, 1, 2))],
                            True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(img, [np.array(pred_bbox, np.int).reshape((-1, 1, 2))],
                                True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            video_path = os.path.join('results', args.dataset, model_name,
                    'baseline', video.name)
            if not os.path.isdir(video_path):
                os.makedirs(video_path)
            result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'.format(
                    v_idx+1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
    else:
        # OPE tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            toc = 0
            pred_bboxes = []
            scores = []
            track_times = []
            for idx, (img, gt_bbox) in enumerate(video):
                tic = cv2.getTickCount()
                if idx == 0:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    scores.append(None)
                    if 'VOT2018-LT' == args.dataset:
                        pred_bboxes.append([1])
                    else:
                        pred_bboxes.append(pred_bbox)
                else:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    pred_bboxes.append(pred_bbox)
                    scores.append(outputs['best_score'])
                toc += cv2.getTickCount() - tic
                track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > 0:
                    gt_bbox = list(map(int, gt_bbox))
                    pred_bbox = list(map(int, pred_bbox))
                    cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]),
                                  (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)
                    cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
                                  (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            if 'VOT2018-LT' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name,
                        'longterm', video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path,
                        '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_001_confidence.value'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in scores:
                        f.write('\n') if x is None else f.write("{:.6f}\n".format(x))
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            elif 'GOT-10k' == args.dataset:
                video_path = os.path.join('results', args.dataset, model_name, video.name)
                if not os.path.isdir(video_path):
                    os.makedirs(video_path)
                result_path = os.path.join(video_path, '{}_001.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
                result_path = os.path.join(video_path,
                        '{}_time.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in track_times:
                        f.write("{:.6f}\n".format(x))
            else:
                model_path = os.path.join('results', args.dataset, model_name)
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                result_path = os.path.join(model_path, '{}.txt'.format(video.name))
                with open(result_path, 'w') as f:
                    for x in pred_bboxes:
                        f.write(','.join([str(i) for i in x])+'\n')
            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
                v_idx+1, video.name, toc, idx / toc))
Esempio n. 28
0
def main():
    is_gpu_cuda_available = torch.cuda.is_available()
    if not is_gpu_cuda_available:
        raise RuntimeError(
            'Failed to locate a CUDA GPU. Program cannot continue..')
    num_gpus = torch.cuda.device_count()
    gpu_type = torch.cuda.get_device_name(0)
    print(f"You have {num_gpus} available of type: {gpu_type}")
    print("This might take a few minutes...Grab a cup of coffee\n")

    # load config
    cfg.merge_from_file(args.config)
    dataset_root = os.path.join(args.dataset_directory, args.dataset)
    print(f"dataset root-->{dataset_root}")

    # create model
    model = ModelBuilder()

    # load model
    model = load_pretrain(model, args.snapshot).cuda().eval()

    # build tracker
    tracker = build_tracker(model)

    # create dataset
    dataset = DatasetFactory.create_dataset(name=args.dataset,
                                            dataset_root=dataset_root,
                                            load_img=False)

    model_name = args.model_name
    print(f"Model name is {model_name}")

    total_lost = 0
    if args.dataset in vot_like_dataset:
        # restart tracking
        for v_idx, video in enumerate(dataset):
            if args.video != '':
                # test one special video
                if video.name != args.video:
                    continue
            frame_counter = 0
            lost_number = 0
            toc = 0
            pred_bboxes = []
            for idx, (img, gt_bbox) in enumerate(video):
                if len(gt_bbox) == 4:
                    gt_bbox = [
                        gt_bbox[0], gt_bbox[1], gt_bbox[0],
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1,
                        gt_bbox[1] + gt_bbox[3] - 1,
                        gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1]
                    ]
                tic = cv2.getTickCount()
                if idx == frame_counter:
                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                    gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h]
                    tracker.init(img, gt_bbox_)
                    pred_bbox = gt_bbox_
                    pred_bboxes.append(1)
                elif idx > frame_counter:
                    outputs = tracker.track(img)
                    pred_bbox = outputs['bbox']
                    if cfg.MASK.MASK:
                        pred_bbox = outputs['polygon']
                    overlap = vot_overlap(pred_bbox, gt_bbox,
                                          (img.shape[1], img.shape[0]))
                    if overlap > 0.85:
                        # not lost
                        pred_bboxes.append(pred_bbox)
                    else:
                        # lost object
                        pred_bboxes.append(2)
                        frame_counter = idx + args.skip_frames  # skip 1 frame
                        lost_number += 1
                else:
                    pred_bboxes.append(0)
                toc += cv2.getTickCount() - tic
                if idx == 0:
                    cv2.destroyAllWindows()
                if args.vis and idx > frame_counter:
                    cv2.polylines(
                        img, [np.array(gt_bbox, np.int).reshape(
                            (-1, 1, 2))], True, (0, 255, 0), 3)
                    if cfg.MASK.MASK:
                        cv2.polylines(
                            img,
                            [np.array(pred_bbox, np.int).reshape(
                                (-1, 1, 2))], True, (0, 255, 255), 3)
                    else:
                        bbox = list(map(int, pred_bbox))
                        cv2.rectangle(img, (bbox[0], bbox[1]),
                                      (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                                      (0, 255, 255), 3)
                    cv2.putText(img, str(idx), (40, 40),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    cv2.putText(img, str(lost_number), (40, 80),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    cv2.imshow(video.name, img)
                    cv2.waitKey(1)
            toc /= cv2.getTickFrequency()
            # save results
            save_path = os.path.join(args.results_path, args.dataset,
                                     model_name, args.experiment_name,
                                     video.name)
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            result_path = os.path.join(save_path,
                                       '{}_001.txt'.format(video.name))
            with open(result_path, 'w') as f:
                for x in pred_bboxes:
                    if isinstance(x, int):
                        f.write("{:d}\n".format(x))
                    else:
                        f.write(','.join([vot_float2str("%.4f", i)
                                          for i in x]) + '\n')
            with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f:
                f.write(
                    f"{v_idx+1} Class: {video.name} | Time: {toc}s | Speed: {idx/toc}fps | Lost:{lost_number}  \n"
                )

            print(
                '({:3d}) Class: {:12s} Time: {:4.1f}s Speed: {:3.1f}fps Lost: {:d}'
                .format(v_idx + 1, video.name, toc, idx / toc, lost_number))
            total_lost += lost_number
        print("{:s} total lost: {:d}".format(model_name, total_lost))
        with open(os.path.join(save_path, '..', 'lost.txt'), 'a+') as f:
            f.write(
                f"Model architeture used --> {model_name} \ntotal lost: {total_lost} \n"
            )
            f.write(f"SKIP FRAMES USED --> {args.skip_frames}")
    else:
        # OPE tracking
        # will be implemented if needed in future
        pass

color_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_mask_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_bbox_img = np.zeros((1280, 720, 3), dtype=np.uint8)
result_mask = np.zeros((1280, 720), dtype=np.uint8)
pysot_img = np.zeros((1280, 720, 3), dtype=np.uint8)
mask_rcnn_flag = 0
pysot_mask = np.zeros((1280, 720), dtype=np.uint8)
pysot_contour_img = np.zeros((1280, 720, 3), dtype=np.uint8)

cfg.merge_from_file('config.yaml')
cfg.CUDA = torch.cuda.is_available()
device = torch.device('cuda' if cfg.CUDA else 'cpu')
model_pysot = ModelBuilder()
tracker = build_tracker(model_pysot)
model_pysot.load_state_dict(
    torch.load('model.pth', map_location=lambda storage, loc: storage.cpu()))
model_pysot.eval().to(device)


def run_maskrcnn():
    global color_img
    global result_mask_img
    global result_bbox_img
    global result_mask
    global mask_rcnn_flag
    global inds_len
    while 1:
        mask_rcnn_flag = 1
        result = inference_detector(model, color_img)
def main():

    torch.cuda.set_device(args.gpu_id)

    model_dir = "./experiments/siamrpn_r50_l234_dwxcorr/model.pth"
    model_config = "./experiments/siamrpn_r50_l234_dwxcorr/config.yaml"

    if os.path.isfile(model_dir):
        print("model file {} found".format(model_dir))
    else:
        print("model files not found, starting download".format(model_dir))
        os.system(
            "gdown https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH")
        os.system("mv model.pth ./experiments/siamrpn_r50_l234_dwxcorr")

    # load config
    cfg.merge_from_file(model_config)
    cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create model
    model = ModelBuilder()

    # load model
    model.load_state_dict(torch.load(model_dir,
                                     map_location=lambda storage, loc: storage.cpu()))
    model.eval().to(device)

    # create an unique identifier
    worker_id = uuid.uuid4()

    # build tracker
    tracker = build_tracker(model)

    # Socket to talk to server
    context = zmq.Context()
    sub_socket = context.socket(zmq.SUB)

    # set up frame listening socket
    sub_socket.connect("tcp://{}:5556".format(args.server_ip))
    sub_socket.setsockopt_string(zmq.SUBSCRIBE, "frame_")
    sub_socket.setsockopt_string(zmq.SUBSCRIBE, str(worker_id))

    # setup push socket
    context = zmq.Context()
    push_socket = context.socket(zmq.PUSH)
    push_socket.connect("tcp://{}:5557".format(args.server_ip))

    # event monitoring
    # used to register worker once connection is established
    EVENT_MAP = {}
    for name in dir(zmq):
        if name.startswith('EVENT_'):
            value = getattr(zmq, name)
            EVENT_MAP[value] = name

    # monitor thread function
    def event_monitor(monitor):
        while monitor.poll():
            evt = recv_monitor_message(monitor)
            evt.update({'description': EVENT_MAP[evt['event']]})
            if evt['event'] == zmq.EVENT_HANDSHAKE_SUCCEEDED:
                push_socket.send_json(
                    {"type": "REGISTER", "id": str(worker_id)})
            if evt['event'] == zmq.EVENT_MONITOR_STOPPED:
                break
        monitor.close()

    # register monitor
    monitor = sub_socket.get_monitor_socket()

    t = threading.Thread(target=event_monitor, args=(monitor,))
    t.start()

    support = None

    try:
        while True:
            # wait for next message
            _ = sub_socket.recv()
            md = sub_socket.recv_json()
            if md['type'] == 'FRAME':
                msg = sub_socket.recv()
                buf = memoryview(msg)
                frame = np.frombuffer(
                    buf, dtype=md['dtype']).reshape(md['shape'])

                if support is None:
                    continue

                outputs = tracker.track(frame)
                bbox = list(map(int, outputs['bbox']))

                # send result
                push_socket.send_json(
                    {
                        "type": "TRACK",
                        "bbox": bbox,
                        "score": outputs['best_score'].tolist(),
                        "time": md['time'],
                        "id": str(worker_id)
                    })
                print('message: {}'.format(md['time']), end='\r')
            elif md['type'] == 'SUPPORT':
                frame_raw = md['data']['img']  # base 64 png image
                frame = np.array(
                    Image.open(
                        io.BytesIO(
                            base64.b64decode(frame_raw)
                        )
                    ).convert('RGB'))[:, :, ::-1]
                bbox = [int(float(i)) for i in md['data']['bbox'].split(",")]
                tracker.init(frame, bbox)
                support = (frame, bbox)
                print('Support received, tracking will now start')
            elif md['type'] == 'LOCATION':
                # make sure tracker has been initalized
                if support is not None:
                    center_pos = np.array(md['data'])
                    tracker.update(center_pos)
            elif md['type'] == 'PING':
                push_socket.send_json({"type": "PONG", "id": str(worker_id)})
            else:
                print('Invalid message type received: {}'.format(md['type']))
    except KeyboardInterrupt:
        print('Exiting... notifying server of disconnect')
        push_socket.send_json(
            {"type": "FIN", "id": str(worker_id)})
        # wait for the server to respond or let the user forcefully close
        print("Waiting for server response. Press CTRL+C again to forcefully close")
        while True:
            _ = sub_socket.recv()
            md = sub_socket.recv_json()
            if md['type'] == "FIN":
                print('Server responded, now exiting')
                exit(0)
            elif md['type'] == "FRAME":
                # we have to accept the incoming frame to properly accept future messages
                msg = sub_socket.recv()