Esempio n. 1
0
class SiamMaskWrapper():
    def __init__(self, base_path=DEFAULT_BASE_PATH, config=DEFAULT_CONFIG,
                 resume=DEFAULT_RESUME, cpu=False):
        args = Namespace(base_path=base_path, config=config,
                         resume=resume, cpu=cpu)

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True
        self.state = None
        self.cfg = load_config(args)  # TODO figure out the important parts of this
        self.siammask = Custom(anchors=self.cfg['anchors'])
        if args.resume:
            assert isfile(args.resume), 'Please download {} first.'.format(args.resume)
            self.siammask = load_pretrain(self.siammask, args.resume)

        self.siammask.eval().to(self.device)

    def select_region(self, image, xywh=None):
        """
        image : 3 channel image
            The initial image with the object
        xywh : ArrayLike
            the position of the initial bounding rectangle as [x, y, w, h]
            If unspecified, a pop up selection will be used
        """
        if xywh is None:
            xywh = cv2.selectROI('SiamMask', image, False, False)

        x, y, w, h = xywh  # simply expand for convenience

        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        # init tracker
        self.state = siamese_init(image, target_pos, target_sz, self.siammask,
                                  self.cfg['hp'], device=self.device)

    def predict(self, image, visualize=False, verbose=False):
        self.state = siamese_track(self.state, image, mask_enable=True,
                                   refine_enable=True, device=self.device)

        target_pos = self.state["target_pos"]
        target_sz = self.state["target_sz"]
        score = self.state["score"]
        location = self.state['ploygon'].flatten()
        # compute as ltwh
        ltwh = np.concatenate((location[0:2], location[4:6] - location[0:2]))
        transformed_loc = [np.int0(location).reshape((-1, 1, 2))]
        if verbose:
            print("transformed loc : {}".format(transformed_loc))

        mask = self.state['mask'] > self.state['p'].seg_thr
        image[:, :, 2] = (mask > 0) * 255 + (mask == 0) * image[:, :, 2]
        cv2.polylines(image, transformed_loc, True, (0, 255, 0), 3)
        #cv2.line(image, tuple(loc[0:2]), tuple(loc[0:2] + loc[2:]), (0, 255, 0))
        if visualize:
            # return mask
            cv2.imshow('SiamMask', image)
            cv2.waitKey(10000)

        return ltwh, score, image  # TODO the image should be a crop
Esempio n. 2
0
def main():
    global args, logger, v_id
    args = parser.parse_args()
    cfg = load_config(args)

    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info(args)

    # setup model
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        parser.error('invalid architecture: {}'.format(args.arch))

    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model = load_pretrain(model, args.resume)
    model.eval()
    device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
    model = model.to(device)
    # setup dataset
    dataset = load_dataset(args.dataset)

    # VOS or VOT?
    if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output
    else:
        vos_enable = False

    total_lost = 0  # VOT
    iou_lists = []  # VOS
    speed_list = []

    for v_id, video in enumerate(dataset.keys(), start=1):
        if args.video != '' and video != args.video:
            continue

        if vos_enable:
            iou_list, speed = track_vos(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None,
                                 args.mask, args.refine, args.dataset in ['DAVIS2017', 'ytb_vos'], device=device)
            iou_lists.append(iou_list)
        else:
            lost, speed = track_vot(model, dataset[video], cfg['hp'] if 'hp' in cfg.keys() else None,
                             args.mask, args.refine, device=device)
            total_lost += lost
        speed_list.append(speed)

    # report final result
    if vos_enable:
        for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)):
            logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(thr, iou))
    else:
        logger.info('Total Lost: {:d}'.format(total_lost))

    logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
Esempio n. 3
0
    def __init__(self,sample_im, base_dir='', x=0 ,y=0,w=10,h=10, use_tensorrt=False,fp16_mode=True,features_trt=True,rpn_trt=False,mask_trt=False,refine_trt=False):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True

        # Setup Model
        args = argparse.Namespace()
        args.config = base_dir + 'SiamMask/experiments/siammask_sharp/config_vot.json'
        args.resume = base_dir + 'SiamMask/experiments/siammask_sharp/SiamMask_VOT.pth'
        self.cfg = load_config(args)
        from custom import Custom
        siammask = Custom(anchors=self.cfg['anchors'])
        siammask = load_pretrain(siammask, args.resume)
        if args.resume:
            assert isfile(args.resume), 'Please download {} first.'.format(args.resume)
            siammask = load_pretrain(siammask, args.resume)

        siammask.eval().to(self.device)
    
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        self.state = siamese_init(sample_im, target_pos, target_sz, siammask, self.cfg['hp'], device=self.device)  # init tracker
        if use_tensorrt:
             self.state['net'].init_trt(fp16_mode,features_trt,rpn_trt,mask_trt,refine_trt, trt_weights_path='/root/msl_raptor_ws/src/msl_raptor/src/front_end/SiamMask/weights_trt')

        self.keys_to_share = ['target_pos','target_sz','score','mask','ploygon']

        self.states_each_object = []
        self.current_classes = []
Esempio n. 4
0
 def get_siammask():
     siammask = Custom(anchors=cfg['anchors'])
     if args.resume:
         assert isfile(args.resume), 'Please download {} first.'.format(
             args.resume)
         siammask = load_pretrain(siammask, args.resume)
     siammask.eval().to(device)
     return siammask
Esempio n. 5
0
def track_init(img, x, y, w, h, device):
    cfg = load_config('config_davis.json')
    # Setup Model
    from custom import Custom
    siammask = Custom(anchors=cfg['anchors'])
    siammask = load_pretrain(siammask, 'SiamMask_VOT.pth')
    siammask.eval().to(device)
    target_pos = np.array([x + w / 2, y + h / 2])
    target_sz = np.array([w, h])
    state = siamese_init(img, target_pos, target_sz, siammask, cfg['hp'], device=device)
    return state
Esempio n. 6
0
    def __init__(self):
        # Setup device
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True

        # Setup Model
        args = FakeArgParser()
        cfg = libsiam.load_config(args)
        siammask = Custom(anchors=cfg['anchors'])
        if args.resume:
            assert libsiam.isfile(
                args.resume), '{} is not a valid file'.format(args.resume)
            siammask = libsiam.load_pretrain(siammask, args.resume)

        siammask.eval().to(device)

        # -- Output
        self.siammask = siammask
        self.args = args
        self.cfg = cfg
        self.state = None
Esempio n. 7
0
class SiamTracker(object):
    def __init__(self, resume=os.path.join(os.path.dirname(__file__), 'experiments/siammask_sharp/SiamMask_DAVIS.pth'), \
                 config=os.path.join(os.path.dirname(__file__), 'experiments/siammask_sharp/config_davis.json')):
        self.args = EasyDict(resume=resume, config=config)

        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True
        # Setup Model
        self.cfg = load_config(self.args)
        self.siammask = Custom(anchors=self.cfg['anchors'])
        assert isfile(self.args.resume), 'Please download {} first.'.format(
            self.args.resume)
        self.siammask = load_pretrain(self.siammask, self.args.resume)
        self.siammask = self.siammask.eval().half().to(self.device)

    def get_state(self, im, bbox):
        x, y = bbox[0], bbox[1]
        w, h = bbox[2] - x, bbox[3] - y
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        state = siamese_init(im,
                             target_pos,
                             target_sz,
                             self.siammask,
                             self.cfg['hp'],
                             device=self.device)
        state['track_id'] = np.random.randint(1000000)
        state['box'] = bbox
        state['score'] = 1.
        return state

    def track(self, state, im):
        new_state = siamese_track(state,
                                  im,
                                  self.siammask,
                                  mask_enable=False,
                                  refine_enable=False,
                                  device=self.device)
        new_state['track_id'] = state['track_id']
        p = new_state['target_pos']
        sz = new_state['target_sz']
        new_state['box'] = np.concatenate((p - sz / 2, p + sz / 2))
        return new_state
Esempio n. 8
0
def main():
    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    params = {'penalty_k': args.penalty_k,
              'window_influence': args.window_influence,
              'lr': args.lr,
              'instance_size': args.search_region}

    num_search = len(params['penalty_k']) * len(params['window_influence']) * \
        len(params['lr']) * len(params['instance_size'])

    print(params)
    print(num_search)

    cfg = load_config(args)
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        model = models.__dict__[args.arch](anchors=cfg['anchors'])

    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
        model = load_pretrain(model, args.resume)
    model.eval()
    model = model.to(device)

    default_hp = cfg.get('hp', {})

    p = dict()

    p['network'] = model
    p['network_name'] = args.arch+'_'+args.resume.split('/')[-1].split('.')[0]
    p['dataset'] = args.dataset

    global ims, gt, image_files

    dataset_info = load_dataset(args.dataset)
    videos = list(dataset_info.keys())
    np.random.shuffle(videos)

    for video in videos:
        print(video)
        if isfile('finish.flag'):
            return

        p['video'] = video
        ims = None
        image_files = dataset_info[video]['image_files']
        gt = dataset_info[video]['gt']

        np.random.shuffle(params['penalty_k'])
        np.random.shuffle(params['window_influence'])
        np.random.shuffle(params['lr'])
        for penalty_k in params['penalty_k']:
            for window_influence in params['window_influence']:
                for lr in params['lr']:
                    for instance_size in params['instance_size']:
                        p['hp'] = default_hp.copy()
                        p['hp'].update({'penalty_k':penalty_k,
                                'window_influence':window_influence,
                                'lr':lr,
                                'instance_size': instance_size,
                                })
                        tune(p)
Esempio n. 9
0
class Dimp_LTMU_Tracker(object):
    def __init__(self, image, region, p=None, groundtruth=None):
        self.p = p
        self.i = 0
        self.t_id = 0
        if groundtruth is not None:
            self.groundtruth = groundtruth

        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.3
        self.sess = tf.Session(config=tfconfig)
        init_gt1 = [region.x, region.y, region.width, region.height]
        init_gt = [
            init_gt1[1], init_gt1[0], init_gt1[1] + init_gt1[3],
            init_gt1[0] + init_gt1[2]
        ]  # ymin xmin ymax xmax

        self.last_gt = init_gt
        self.init_pymdnet(image, init_gt1)
        self.local_init(image, init_gt1)
        self.Golbal_Track_init(image, init_gt1)
        if self.p.use_mask:
            self.siammask_init(image, init_gt1)
        self.tc_init(self.p.model_dir)
        self.metric_init(image, np.array(init_gt1))
        self.dis_record = []
        self.state_record = []
        self.rv_record = []
        self.all_map = []
        self.count = 0

        local_state1, self.score_map, update, self.score_max, dis, flag, update_score = self.local_track(
            image)
        self.local_Tracker.pos = torch.FloatTensor([
            (self.last_gt[0] + self.last_gt[2] - 1) / 2,
            (self.last_gt[1] + self.last_gt[3] - 1) / 2
        ])
        self.local_Tracker.target_sz = torch.FloatTensor([
            (self.last_gt[2] - self.last_gt[0]),
            (self.last_gt[3] - self.last_gt[1])
        ])

    def get_first_state(self):
        return self.score_map, self.score_max

    def siammask_init(self, im, init_gt):
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        parser = argparse.ArgumentParser(description='PyTorch Tracking Demo')

        parser.add_argument(
            '--resume',
            default='SiamMask/experiments/siammask/SiamMask_VOT_LD.pth',
            type=str,
            metavar='PATH',
            help='path to latest checkpoint (default: none)')
        parser.add_argument(
            '--config',
            dest='config',
            default='SiamMask/experiments/siammask/config_vot19lt.json',
            help='hyper-parameter of SiamMask in json format')
        args = parser.parse_args()
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True

        # Setup Model
        cfg = load_config(args)
        self.siammask = Custom(anchors=cfg['anchors'])
        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(
                args.resume)
            self.siammask = load_pretrain(self.siammask, args.resume)

        self.siammask.eval().to(device)
        x = init_gt[0]
        y = init_gt[1]
        w = init_gt[2]
        h = init_gt[3]
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        self.siamstate = siamese_init(im, target_pos, target_sz, self.siammask,
                                      cfg['hp'])

    def siammask_track(self, im):
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        self.siamstate = siamese_track(self.siamstate,
                                       im,
                                       mask_enable=True,
                                       refine_enable=True)  # track
        # pdb.set_trace()
        score = np.max(self.siamstate['score'])
        location = self.siamstate['ploygon'].flatten()
        mask = self.siamstate['mask'] > self.siamstate['p'].seg_thr

        # im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2]
        #
        # cv2.namedWindow("SiamMask", cv2.WINDOW_NORMAL)
        # cv2.rectangle(im, (int(self.siamstate['target_pos'][0] - self.siamstate['target_sz'][0] / 2.0),
        #                    int(self.siamstate['target_pos'][1] - self.siamstate['target_sz'][1] / 2.0)),
        #               (int(self.siamstate['target_pos'][0] + self.siamstate['target_sz'][0] / 2.0),
        #                int(self.siamstate['target_pos'][1] + self.siamstate['target_sz'][1] / 2.0)), [0, 255, 0], 2)
        # # cv2.imwrite("/home/xiaobai/Desktop/MBMD_vot_code/figure/%05d.jpg"%frame_id, im[:, :, -1::-1])
        # cv2.imshow("SiamMask", im)
        # cv2.waitKey(1)
        return score, mask

    def Golbal_Track_init(self, image, init_box):
        init_box = [
            init_box[0], init_box[1], init_box[0] + init_box[2],
            init_box[1] + init_box[3]
        ]
        cfg_file = 'Global_Track/configs/qg_rcnn_r50_fpn.py'
        ckp_file = 'Global_Track/checkpoints/qg_rcnn_r50_fpn_coco_got10k_lasot.pth'
        transforms = data.BasicPairTransforms(train=False)
        self.Global_Tracker = GlobalTrack(cfg_file,
                                          ckp_file,
                                          transforms,
                                          name_suffix='qg_rcnn_r50_fpn')
        self.Global_Tracker.init(image, init_box)

    def Global_Track_eval(self, image, num):
        # xywh
        results = self.Global_Tracker.update(image)
        index = np.argsort(results[:, -1])[::-1]
        max_index = index[:num]
        can_boxes = results[max_index][:, :4]
        can_boxes = np.array([
            can_boxes[:, 0], can_boxes[:,
                                       1], can_boxes[:, 2] - can_boxes[:, 0],
            can_boxes[:, 3] - can_boxes[:, 1]
        ]).transpose()
        return can_boxes

    def init_pymdnet(self, image, init_bbox):
        target_bbox = np.array(init_bbox)
        self.last_result = target_bbox
        self.pymodel = MDNet('./pyMDNet/models/mdnet_imagenet_vid.pth')
        if opts['use_gpu']:
            self.pymodel = self.pymodel.cuda()
        self.pymodel.set_learnable_params(opts['ft_layers'])

        # Init criterion and optimizer
        self.criterion = BCELoss()
        init_optimizer = set_optimizer(self.pymodel, opts['lr_init'],
                                       opts['lr_mult'])
        self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'],
                                              opts['lr_mult'])

        tic = time.time()

        # Draw pos/neg samples
        pos_examples = SampleGenerator('gaussian', image.size,
                                       opts['trans_pos'], opts['scale_pos'])(
                                           target_bbox, opts['n_pos_init'],
                                           opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                            opts['scale_neg_init'])(target_bbox,
                                                    int(opts['n_neg_init'] *
                                                        0.5),
                                                    opts['overlap_neg_init']),
            SampleGenerator('whole', image.size)(target_bbox,
                                                 int(opts['n_neg_init'] * 0.5),
                                                 opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        pos_feats = forward_samples(self.pymodel, image, pos_examples, opts)
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.feat_dim = pos_feats.size(-1)

        # Initial training
        train(self.pymodel,
              self.criterion,
              init_optimizer,
              pos_feats,
              neg_feats,
              opts['maxiter_init'],
              opts=opts)
        del init_optimizer, neg_feats
        torch.cuda.empty_cache()

        # Train bbox regressor
        bbreg_examples = SampleGenerator(
            'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
            opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                                  opts['overlap_bbreg'])
        bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples,
                                      opts)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
        del bbreg_feats
        torch.cuda.empty_cache()
        # Init sample generators
        self.sample_generator = SampleGenerator('gaussian', image.size,
                                                opts['trans'], opts['scale'])
        self.pos_generator = SampleGenerator('gaussian', image.size,
                                             opts['trans_pos'],
                                             opts['scale_pos'])
        self.neg_generator = SampleGenerator('uniform', image.size,
                                             opts['trans_neg'],
                                             opts['scale_neg'])

        # Init pos/neg features for update
        neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'],
                                          opts['overlap_neg_init'])
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.pos_feats_all = [pos_feats]
        self.neg_feats_all = [neg_feats]

        spf_total = time.time() - tic

    def pymdnet_eval(self, image, samples):
        sample_scores = forward_samples(self.pymodel,
                                        image,
                                        samples,
                                        out_layer='fc6',
                                        opts=opts)
        return sample_scores[:, 1][:].cpu().numpy()

    # def pymdnet_track(self, image):
    #     self.image = image
    #     target_bbox = self.last_result
    #     samples = self.sample_generator(target_bbox, opts['n_samples'])
    #     sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts)
    #
    #     top_scores, top_idx = sample_scores[:, 1].topk(5)
    #     top_idx = top_idx.cpu().numpy()
    #     target_score = top_scores.mean()
    #     target_bbox = samples[top_idx].mean(axis=0)
    #
    #     success = target_score > 0
    #
    #     # Expand search area at failure
    #     if success:
    #         self.sample_generator.set_trans(opts['trans'])
    #     else:
    #         self.sample_generator.expand_trans(opts['trans_limit'])
    #
    #     self.last_result = target_bbox
    #     # Bbox regression
    #     bbreg_bbox = self.pymdnet_bbox_reg(success, samples, top_idx)
    #
    #     # Save result
    #     region = bbreg_bbox
    #
    #     # Data collect
    #     if success:
    #         self.collect_samples_pymdnet()
    #
    #     # Short term update
    #     if not success:
    #         self.pymdnet_short_term_update()
    #
    #     # Long term update
    #     elif self.i % opts['long_interval'] == 0:
    #         self.pymdnet_long_term_update()
    #
    #     return region, target_score

    def collect_samples_pymdnet(self, image):
        self.t_id += 1
        target_bbox = np.array([
            self.last_gt[1], self.last_gt[0],
            self.last_gt[3] - self.last_gt[1],
            self.last_gt[2] - self.last_gt[0]
        ])
        pos_examples = self.pos_generator(target_bbox, opts['n_pos_update'],
                                          opts['overlap_pos_update'])
        if len(pos_examples) > 0:
            pos_feats = forward_samples(self.pymodel, image, pos_examples,
                                        opts)
            self.pos_feats_all.append(pos_feats)
        if len(self.pos_feats_all) > opts['n_frames_long']:
            del self.pos_feats_all[0]

        neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'],
                                          opts['overlap_neg_update'])
        if len(neg_examples) > 0:
            neg_feats = forward_samples(self.pymodel, image, neg_examples,
                                        opts)
            self.neg_feats_all.append(neg_feats)
        if len(self.neg_feats_all) > opts['n_frames_short']:
            del self.neg_feats_all[0]

    def pymdnet_short_term_update(self):
        # Short term update
        nframes = min(opts['n_frames_short'], len(self.pos_feats_all))
        pos_data = torch.cat(self.pos_feats_all[-nframes:], 0)
        neg_data = torch.cat(self.neg_feats_all, 0)
        train(self.pymodel,
              self.criterion,
              self.update_optimizer,
              pos_data,
              neg_data,
              opts['maxiter_update'],
              opts=opts)

    def pymdnet_long_term_update(self):
        if self.t_id % opts['long_interval'] == 0:
            # Long term update
            pos_data = torch.cat(self.pos_feats_all, 0)
            neg_data = torch.cat(self.neg_feats_all, 0)
            train(self.pymodel,
                  self.criterion,
                  self.update_optimizer,
                  pos_data,
                  neg_data,
                  opts['maxiter_update'],
                  opts=opts)

    #
    # def pymdnet_bbox_reg(self, success, samples, top_idx):
    #     target_bbox = self.last_result
    #     if success:
    #         bbreg_samples = samples[top_idx]
    #         if top_idx.shape[0] == 1:
    #             bbreg_samples = bbreg_samples[None, :]
    #         bbreg_feats = forward_samples(self.pymodel, self.image, bbreg_samples, opts)
    #         bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples)
    #         bbreg_bbox = bbreg_samples.mean(axis=0)
    #     else:
    #         bbreg_bbox = target_bbox
    #     return bbreg_bbox

    def metric_init(self, im, init_box):
        self.metric_model = ft_net(class_num=1120)
        path = '../utils/metric_net/metric_model/metric_model.pt'
        self.metric_model.eval()
        self.metric_model = self.metric_model.cuda()
        self.metric_model.load_state_dict(torch.load(path))
        tmp = np.random.rand(1, 3, 107, 107)
        tmp = (Variable(torch.Tensor(tmp))).type(torch.FloatTensor).cuda()
        # get target feature
        self.metric_model(tmp)
        init_box = init_box.reshape((1, 4))
        anchor_region = me_extract_regions(im, init_box)
        anchor_region = process_regions(anchor_region)
        anchor_region = torch.Tensor(anchor_region)
        anchor_region = (Variable(anchor_region)).type(
            torch.FloatTensor).cuda()
        self.anchor_feature, _ = self.metric_model(anchor_region)

    def metric_eval(self, im, boxes, anchor_feature):
        box_regions = me_extract_regions(np.array(im), boxes)
        box_regions = process_regions(box_regions)
        box_regions = torch.Tensor(box_regions)
        box_regions = (Variable(box_regions)).type(torch.FloatTensor).cuda()
        box_features, class_result = self.metric_model(box_regions)

        class_result = torch.softmax(class_result, dim=1)
        ap_dist = torch.norm(anchor_feature - box_features, 2, dim=1).view(-1)
        return ap_dist

    def tc_init(self, model_dir):
        self.tc_model = tclstm()
        self.X_input = tf.placeholder(
            "float", [None, tcopts['time_steps'], tcopts['lstm_num_input']])
        self.maps = tf.placeholder("float", [None, 19, 19, 1])
        self.map_logits = self.tc_model.map_net(self.maps)
        self.Inputs = tf.concat((self.X_input, self.map_logits), axis=2)
        self.logits, _ = self.tc_model.net(self.Inputs)

        variables_to_restore = [
            var for var in tf.global_variables()
            if (var.name.startswith('tclstm') or var.name.startswith('mapnet'))
        ]
        saver = tf.train.Saver(var_list=variables_to_restore)
        if self.p.checkpoint is None:
            checkpoint = tf.train.latest_checkpoint(
                os.path.join('./meta_updater', model_dir))
        else:
            checkpoint = './meta_updater/' + self.p.model_dir + '/lstm_model.ckpt-' + str(
                self.p.checkpoint)
        saver.restore(self.sess, checkpoint)

    def local_init(self, image, init_bbox):
        local_tracker = Tracker('dimp', 'dimp50')
        params = local_tracker.get_parameters()

        debug_ = getattr(params, 'debug', 0)
        params.debug = debug_

        params.tracker_name = local_tracker.name
        params.param_name = local_tracker.parameter_name

        self.local_Tracker = local_tracker.tracker_class(params)
        init_box = dict()
        init_box['init_bbox'] = init_bbox
        self.local_Tracker.initialize(image, init_box)

    def local_track(self, image):
        state, score_map, test_x, scale_ind, sample_pos, sample_scales, flag, s = self.local_Tracker.track_updater(
            image)
        update_score = 0
        update_flag = flag not in ['not_found', 'uncertain']
        update = update_flag
        max_score = max(score_map.flatten())
        self.all_map.append(score_map)
        local_state = np.array(state).reshape((1, 4))
        ap_dis = self.metric_eval(image, local_state, self.anchor_feature)
        self.dis_record.append(ap_dis.data.cpu().numpy()[0])
        h = image.shape[0]
        w = image.shape[1]
        self.state_record.append([
            local_state[0][0] / w, local_state[0][1] / h,
            (local_state[0][0] + local_state[0][2]) / w,
            (local_state[0][1] + local_state[0][3]) / h
        ])
        self.rv_record.append(max_score)
        if len(self.state_record) >= self.p.start_frame:
            dis = np.array(self.dis_record[-tcopts["time_steps"]:]).reshape(
                (tcopts["time_steps"], 1))
            rv = np.array(self.rv_record[-tcopts["time_steps"]:]).reshape(
                (tcopts["time_steps"], 1))
            state_tc = np.array(self.state_record[-tcopts["time_steps"]:])
            map_input = np.array(self.all_map[-tcopts["time_steps"]:])
            map_input = np.reshape(map_input,
                                   [tcopts['time_steps'], 1, 19, 19])
            map_input = map_input.transpose((0, 2, 3, 1))
            X_input = np.concatenate((state_tc, rv, dis), axis=1)
            logits = self.sess.run(self.logits,
                                   feed_dict={
                                       self.X_input:
                                       np.expand_dims(X_input, axis=0),
                                       self.maps:
                                       map_input
                                   })
            update = logits[0][0] < logits[0][1]
            update_score = logits[0][1]

        hard_negative = (flag == 'hard_negative')
        learning_rate = getattr(self.local_Tracker.params,
                                'hard_negative_learning_rate',
                                None) if hard_negative else None

        if update:
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.local_Tracker.get_iounet_box(
                self.local_Tracker.pos, self.local_Tracker.target_sz,
                sample_pos[scale_ind, :], sample_scales[scale_ind])

            # Update the classifier model
            self.local_Tracker.update_classifier(train_x, target_box,
                                                 learning_rate, s[scale_ind,
                                                                  ...])
        self.last_gt = [
            state[1], state[0], state[1] + state[3], state[0] + state[2]
        ]
        return state, score_map, update, max_score, ap_dis.data.cpu().numpy(
        )[0], flag, update_score

    def locate(self, image):

        # Convert image
        im = numpy_to_torch(image)
        self.local_Tracker.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.local_Tracker.pos.round()
        sample_scales = self.local_Tracker.target_scale * self.local_Tracker.params.scale_factors
        test_x = self.local_Tracker.extract_processed_sample(
            im, self.local_Tracker.pos, sample_scales,
            self.local_Tracker.img_sample_sz)

        # Compute scores
        scores_raw = self.local_Tracker.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.local_Tracker.localize_target(
            scores_raw)
        return translation_vec, scale_ind, s, flag, sample_pos, sample_scales, test_x

    def local_update(self,
                     sample_pos,
                     translation_vec,
                     scale_ind,
                     sample_scales,
                     s,
                     test_x,
                     update_flag=None):

        # Check flags and set learning rate if hard negative
        if update_flag is None:
            update_flag = self.flag not in ['not_found', 'uncertain']
        hard_negative = (self.flag == 'hard_negative')
        learning_rate = self.local_Tracker.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.local_Tracker.get_label_function(
                sample_pos, sample_scales[scale_ind])

            # Update memory
            self.local_Tracker.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.local_Tracker.filter_optimizer.run(
                self.local_Tracker.params.hard_negative_CG_iter)
        elif (self.local_Tracker.frame_num -
              1) % self.local_Tracker.params.train_skipping == 0:
            self.local_Tracker.filter_optimizer.run(
                self.local_Tracker.params.CG_iter)

    def tracking(self, image):
        self.i += 1
        mask = None
        candidate_bboxes = None
        # state, pyscore = self.pymdnet_track(image)
        # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]]
        self.local_Tracker.pos = torch.FloatTensor([
            (self.last_gt[0] + self.last_gt[2] - 1) / 2,
            (self.last_gt[1] + self.last_gt[3] - 1) / 2
        ])
        self.local_Tracker.target_sz = torch.FloatTensor([
            (self.last_gt[2] - self.last_gt[0]),
            (self.last_gt[3] - self.last_gt[1])
        ])
        tic = time.time()
        local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track(
            image)

        md_score = self.pymdnet_eval(image,
                                     np.array(local_state).reshape([-1, 4]))[0]
        self.score_max = md_score

        if md_score > 0 and flag == 'normal':
            self.flag = 'found'
            if self.p.use_mask:
                self.siamstate['target_pos'] = self.local_Tracker.pos.numpy(
                )[::-1]
                self.siamstate[
                    'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1]
                siamscore, mask = self.siammask_track(
                    cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
                self.local_Tracker.pos = torch.FloatTensor(
                    self.siamstate['target_pos'][::-1].copy())
                self.local_Tracker.target_sz = torch.FloatTensor(
                    self.siamstate['target_sz'][::-1].copy())
                local_state = torch.cat(
                    (self.local_Tracker.pos[[1, 0]] -
                     (self.local_Tracker.target_sz[[1, 0]] - 1) / 2,
                     self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy()
            self.last_gt = np.array([
                local_state[1], local_state[0],
                local_state[1] + local_state[3],
                local_state[0] + local_state[2]
            ])
        elif md_score < 0 or flag == 'not_found':
            self.count += 1
            self.flag = 'not_found'
            candidate_bboxes = self.Global_Track_eval(image, 10)
            candidate_scores = self.pymdnet_eval(image, candidate_bboxes)
            max_id = np.argmax(candidate_scores)
            if candidate_scores[max_id] > 0:
                redet_bboxes = candidate_bboxes[max_id]
                if self.count >= 5:
                    self.last_gt = np.array([
                        redet_bboxes[1], redet_bboxes[0],
                        redet_bboxes[1] + redet_bboxes[3],
                        redet_bboxes[2] + redet_bboxes[0]
                    ])
                    self.local_Tracker.pos = torch.FloatTensor([
                        (self.last_gt[0] + self.last_gt[2] - 1) / 2,
                        (self.last_gt[1] + self.last_gt[3] - 1) / 2
                    ])
                    self.local_Tracker.target_sz = torch.FloatTensor([
                        (self.last_gt[2] - self.last_gt[0]),
                        (self.last_gt[3] - self.last_gt[1])
                    ])
                    self.score_max = candidate_scores[max_id]
                    self.count = 0
        if update:
            self.collect_samples_pymdnet(image)

        self.pymdnet_long_term_update()

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]
        toc = time.time() - tic
        print(toc)
        # if self.flag == 'found' and self.score_max > 0:
        #     confidence_score = 0.99
        # elif self.flag == 'not_found':
        #     confidence_score = 0.0
        # else:
        #     confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1)
        confidence_score = np.clip(
            (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) /
            2, 0, 1)
        if self.p.visualization:
            show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR),
                     np.array(self.last_gt, dtype=np.int32),
                     '2',
                     groundtruth=self.groundtruth,
                     update=update_score,
                     can_bboxes=candidate_bboxes,
                     frame_id=self.i,
                     tracker_score=md_score,
                     mask=mask)

        return [
            float(self.last_gt[1]),
            float(self.last_gt[0]),
            float(width),
            float(height)
        ], self.score_map, 0, confidence_score, 0
Esempio n. 10
0
class SingleTracker(object):
    def __init__(self, config_path, model_path):
        args = TrackArgs()
        args.config = config_path
        args.resume = model_path

        cfg = load_config(args)
        if args.arch == 'Custom':
            from custom import Custom
            self.model = Custom(anchors=cfg['anchors'])
        else:
            parser.error('invalid architecture: {}'.format(args.arch))

        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(args.resume)
            self.model = load_pretrain(self.model, args.resume)
        self.model.eval()
        self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu')
        self.model = self.model.to(self.device)

        ################# Dangerous
        self.p = TrackerConfig()
        self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors)
        self.p.renew()

        self.p.scales = self.model.anchors['scales']
        self.p.ratios = self.model.anchors['ratios']
        self.p.anchor_num = self.model.anchor_num
        self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size)

        if self.p.windowing == 'cosine':
            self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size))
        elif self.p.windowing == 'uniform':
            self.window = np.ones((self.p.score_size, self.p.score_size))
        self.window = np.tile(self.window.flatten(), self.p.anchor_num)
        ################


    def get_examplar_feature(self, img, target_pos, target_sz):
        avg_chans = np.mean(img, axis=(0, 1))

        wc_z = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_z = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_z = round(np.sqrt(wc_z * hc_z))
        # initialize the exemplar
        examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans)

        z = Variable(examplar.unsqueeze(0))
        return self.model.template(z.to(self.device))

    def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True):
        avg_chans = np.mean(img, axis=(0, 1))
        im_h = img.shape[0]
        im_w = img.shape[1]

        wc_x = target_sz[0] + self.p.context_amount * sum(target_sz)
        hc_x = target_sz[1] + self.p.context_amount * sum(target_sz)
        s_x = np.sqrt(wc_x * hc_x)
        '''
        scale_x = self.p.exemplar_size / s_x
        d_search = (self.p.instance_size - self.p.exemplar_size) / 2
        pad = d_search / scale_x
        s_x = s_x + 2 * pad
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]
        '''
        # myy
        # 上面注释的部分, 原作者写的代码可以简化为下面三句
        scale_x = self.p.exemplar_size / s_x
        s_x = self.p.instance_size / self.p.exemplar_size * s_x
        crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]


        # extract scaled crops for search region x at previous target position
        x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0))

        if mask_enable:
            score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device))
        else:
            score, delta = self.model.track(examplar_feature, x_crop.to(self.device))

        delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy()
        score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:,
                1].cpu().numpy()

        delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0]
        delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3]

        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            sz2 = (w + pad) * (h + pad)
            return np.sqrt(sz2)

        def sz_wh(wh):
            pad = (wh[0] + wh[1]) * 0.5
            sz2 = (wh[0] + pad) * (wh[1] + pad)
            return np.sqrt(sz2)

        # size penalty
        target_sz_in_crop = target_sz*scale_x
        s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop)))  # scale penalty
        r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :]))  # ratio penalty

        penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k)
        pscore = penalty * score

        # cos window (motion model)
        pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence
        best_pscore_id = np.argmax(pscore)

        pred_in_crop = delta[:, best_pscore_id] / scale_x
        lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr  # lr for OTB

        res_x = pred_in_crop[0] + target_pos[0]
        res_y = pred_in_crop[1] + target_pos[1]

        res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr
        res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr

        target_pos = np.array([res_x, res_y])
        target_sz = np.array([res_w, res_h])

        # for Mask Branch
        if mask_enable:
            best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size))
            delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1]

            if refine_enable:
                mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view(
                    self.p.out_size, self.p.out_size).cpu().data.numpy()
            else:
                mask = mask[0, :, delta_y, delta_x].sigmoid(). \
                    squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy()

            def crop_back(image, bbox, out_sz, padding=-1):
                a = (out_sz[0] - 1) / bbox[2]
                b = (out_sz[1] - 1) / bbox[3]
                c = -a * bbox[0]
                d = -b * bbox[1]
                mapping = np.array([[a, 0, c],
                                    [0, b, d]]).astype(np.float)
                crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]),
                                    flags=cv2.INTER_LINEAR,
                                    borderMode=cv2.BORDER_CONSTANT,
                                    borderValue=padding)
                return crop

            s = crop_box[2] / self.p.instance_size
            sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s,
                    crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s,
                    s * self.p.exemplar_size, s * self.p.exemplar_size]
            s = self.p.out_size / sub_box[2]
            back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s]
            mask_in_img = crop_back(mask, back_box, (im_w, im_h))

            target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8)
            if cv2.__version__[-5] == '4':
                contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            else:
                _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            cnt_area = [cv2.contourArea(cnt) for cnt in contours]
            if len(contours) != 0 and np.max(cnt_area) > 100:
                contour = contours[np.argmax(cnt_area)]  # use max area polygon
                polygon = contour.reshape(-1, 2)
                # pbox = cv2.boundingRect(polygon)  # Min Max Rectangle
                prbox = cv2.boxPoints(cv2.minAreaRect(polygon))  # Rotated Rectangle

                # box_in_img = pbox
                rbox_in_img = prbox
            else:  # empty mask
                location = cxy_wh_2_rect(target_pos, target_sz)
                rbox_in_img = np.array([[location[0], location[1]],
                                        [location[0] + location[2], location[1]],
                                        [location[0] + location[2], location[1] + location[3]],
                                        [location[0], location[1] + location[3]]])

        target_pos[0] = max(0, min(im_w, target_pos[0]))
        target_pos[1] = max(0, min(im_h, target_pos[1]))
        target_sz[0] = max(10, min(im_w, target_sz[0]))
        target_sz[1] = max(10, min(im_h, target_sz[1]))

        score = score[best_pscore_id]
        mask = mask_in_img if mask_enable else []
        return target_pos, target_sz, score, mask
def main():
    global args, logger, v_id
    args = parser.parse_args()
    cfg = load_config(args)

    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)

    logger = logging.getLogger('global')
    logger.info(args)

    # setup model
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        parser.error('invalid architecture: {}'.format(args.arch))

    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model = load_pretrain(model, args.resume)
    model.eval()
    device = torch.device('cuda' if (
        torch.cuda.is_available() and not args.cpu) else 'cpu')
    model = model.to(device)
    # setup dataset
    dataset = load_dataset(args.dataset)

    # VOS or VOT?
    if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output
    else:
        vos_enable = False

    total_lost = 0  # VOT
    # iou_lists = []  # VOS
    # speed_list = []

    for v_id, video in enumerate(dataset.keys(), start=1):
        if args.video != '' and video != args.video:
            continue

        if vos_enable:
            iou_list, speed = track_vos(
                model,
                dataset[video],
                cfg['hp'] if 'hp' in cfg.keys() else None,
                args.mask,
                args.refine,
                args.dataset in ['DAVIS2017', 'ytb_vos'],
                device=device)
            # iou_lists.append(iou_list)
        else:
            lost, speed = track_vot(model,
                                    dataset[video],
                                    cfg['hp'] if 'hp' in cfg.keys() else None,
                                    args.mask,
                                    args.refine,
                                    device=device)
            total_lost += lost
Esempio n. 12
0
if __name__ == '__main__':
    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    torch.backends.cudnn.benchmark = True

    # Setup Model
    cfg = load_config(args)
    from custom import Custom
    siammask = Custom(anchors=cfg['anchors'])
    if args.resume:
        assert isfile(args.resume), 'Please download {} first.'.format(args.resume)
        siammask = load_pretrain(siammask, args.resume)

    siammask.eval().to(device)

    # Parse Image file
    img_files = sorted(glob.glob(join(args.base_path, '*.PN*')))
    print(img_files)
    ims = [cv2.imread(imf) for imf in img_files[130:150]]

    #img_files = sorted(glob.glob(join(args.base_path, '*.jp*')))
    #ims = [cv2.imread(imf) for imf in img_files]

    # Select ROI
    cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN)
    # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    init_rect = cv2.selectROI('SiamMask', ims[0], False, False)
    x, y, w, h = init_rect
Esempio n. 13
0
if __name__ == '__main__':
    # Setup device
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # torch.backends.cudnn.benchmark = True

    # Setup Model
    cfg = load_config(args)
    from custom import Custom
    siammask = Custom(anchors=cfg['anchors'])
    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        siammask = load_pretrain(siammask, args.resume)

    # siammask.eval().to(device)
    siammask.eval()
    # Parse Image file
    img_files = sorted(glob.glob(join(args.base_path, '*.jp*')))
    ims = [cv2.imread(imf) for imf in img_files]

    # Select ROI
    cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN)
    # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
    try:
        init_rect = cv2.selectROI('SiamMask', ims[0], False, False)
        x, y, w, h = init_rect
    except:
        exit()

    toc = 0
    for f, im in enumerate(ims):
Esempio n. 14
0
    # Setup device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #大部分情况下,设置这个 flag 可以让内置的 cuDNN 的 auto-tuner 自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题
    torch.backends.cudnn.benchmark = True

    # Setup Model
    cfg = load_config(args)
    from custom import Custom
    siammask = Custom(anchors=cfg['anchors'])  # anchors从哪里获得???
    if args.resume:
        assert isfile(args.resume), 'Please download {} first.'.format(
            args.resume)
        siammask = load_pretrain(siammask, args.resume)

    siammask.eval().to(device)  #eval属于切换到预测模式,并推送到GPU或CPU上运行

    # Parse Image file
    img_files = sorted(glob.glob(join(args.base_path, '*.jp*')))
    ims = [cv2.imread(imf) for imf in img_files]

    # Select ROI
    cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN)
    # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
    try:
        init_rect = cv2.selectROI('SiamMask', ims[0], False, False)
        x, y, w, h = init_rect  # 返回来4个值
    except:
        exit()

    toc = 0
Esempio n. 15
0
def main():
    # 获取命令行参数信息
    global args, logger, v_id
    args = parser.parse_args()
    # 获取配置文件中配置信息:主要包括网络结构,超参数等
    cfg = load_config(args)
    # 初始化logxi信息,并将日志信息输入到磁盘文件中
    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log, logging.INFO)
    # 将相关的配置信息输入到日志文件中
    logger = logging.getLogger('global')
    logger.info(args)

    # setup model
    # 加载网络模型架构
    if args.arch == 'Custom':
        from custom import Custom
        model = Custom(anchors=cfg['anchors'])
    else:
        parser.error('invalid architecture: {}'.format(args.arch))
    # 加载网络模型参数
    if args.resume:
        assert isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model = load_pretrain(model, args.resume)
    # 使用评估模式,将drop等激活
    model.eval()
    # 硬件信息
    device = torch.device('cuda' if (
        torch.cuda.is_available() and not args.cpu) else 'cpu')
    model = model.to(device)
    # 加载数据集 setup dataset
    dataset = load_dataset(args.dataset)

    # 这三种数据支持掩膜 VOS or VOT?
    if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output
    else:
        vos_enable = False

    total_lost = 0  # VOT
    iou_lists = []  # VOS
    speed_list = []
    # 对数据进行处理
    for v_id, video in enumerate(dataset.keys(), start=1):
        if args.video != '' and video != args.video:
            continue
        # true 调用track_vos
        if vos_enable:
            # 如测试数据是['DAVIS2017', 'ytb_vos']时,会开启多目标跟踪
            iou_list, speed = track_vos(
                model,
                dataset[video],
                cfg['hp'] if 'hp' in cfg.keys() else None,
                args.mask,
                args.refine,
                args.dataset in ['DAVIS2017', 'ytb_vos'],
                device=device)
            iou_lists.append(iou_list)
        # False 调用track_vot
        else:
            lost, speed = track_vot(model,
                                    dataset[video],
                                    cfg['hp'] if 'hp' in cfg.keys() else None,
                                    args.mask,
                                    args.refine,
                                    device=device)
            total_lost += lost
        speed_list.append(speed)

    # report final result
    if vos_enable:
        for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)):
            logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(
                thr, iou))
    else:
        logger.info('Total Lost: {:d}'.format(total_lost))

    logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))
Esempio n. 16
0
class SiamFaceTracker(object):
    def __init__(self,
                 cfg,
                 min_iou=0.3,
                 scale_factor=2,
                 model="SiamMask_DAVIS.pth"):
        device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu'
        )  # to do - check if this should be instantiated for multiple SiamMasks objects
        self.internal_id = uuid.uuid4()  # object identity can also work

        self.cfg = cfg
        self.siammask = Custom(anchors=cfg['anchors'])
        self.siammask = load_pretrain(self.siammask, model)
        self.siammask.eval().to(device)
        self.state = None

        self.prev_bbox = None
        self.is_recruited = False
        self.class_id = None
        self.min_iou = min_iou
        self.iou = None
        self.frames_elapsed_from_set_state = 0
        self.last_tracking_result = None
        self.counter = Counter()
        self.scale_factor = scale_factor

    def set_state(
        self, im, detection
    ):  # we can adapt this input to match the object detector bbox output
        scaled_bbox = scale_bbox(detection, self.scale_factor)
        x = scaled_bbox["left"]
        y = scaled_bbox["top"]
        w = abs(x - scaled_bbox["right"])
        h = abs(y - scaled_bbox["bottom"])
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        self.state = siamese_init(im, target_pos, target_sz, self.siammask,
                                  self.cfg['hp'])
        self.class_id = scaled_bbox["label"]
        self.is_recruited = True
        self.frames_elapsed_from_set_state = 0
        self.counter[scaled_bbox["label"]] += 1

    def update_state(self, im, detection):
        scaled_bbox = scale_bbox(detection, self.scale_factor)
        x = scaled_bbox["left"]
        y = scaled_bbox["top"]
        w = abs(x - scaled_bbox["right"])
        h = abs(y - scaled_bbox["bottom"])
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        self.state = siamese_init(im, target_pos, target_sz, self.siammask,
                                  self.cfg['hp'])
        self.frames_elapsed_from_set_state = 0
        self.counter[scaled_bbox["label"]] += 1

    def invalidate(self, reason):
        print(reason)
        self.class_id = None
        self.is_recruited = False
        self.frames_elapsed_from_set_state = 0
        self.last_tracking_result = None
        self.prev_bbox = None
        self.iou = None
        self.counter = Counter()

    def track_face(self, im):
        if not self.is_recruited:
            return (None)

        self.state = siamese_track(self.state, im, mask_enable=False)

        [x, y] = self.state["target_pos"]
        [w, h] = self.state["target_sz"]

        x = int(x - w / 2)
        y = int(y - h / 2)
        xw = int(x + w)
        yh = int(y + h)

        c_bbox = (x, y, xw, yh)

        if self.prev_bbox:
            self.iou = bb_iou(self.prev_bbox, c_bbox)

        self.prev_bbox = c_bbox
        self.frames_elapsed_from_set_state += 1

        self.class_id = self.counter.most_common(1)[0][0]

        if self.iou:
            if self.iou > self.min_iou:
                self.last_tracking_result = (TrackingResult(
                    self.class_id, c_bbox))
            else:
                self.invalidate(
                    "invalidate: insufficient iou with previous frame")
        else:
            self.last_tracking_result = (TrackingResult(self.class_id, c_bbox))
Esempio n. 17
0
def process_vedio(vedio_path, initRect):
    """
    视频处理
    :param vedio_path:视频路径
    :param initRect: 跟踪目标的初始位置
    :return:
    """

    # 1. 设置设备信息 Setup device
    # 有GPU时选择GPU,否则使用CPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # 默认优化运行效率
    torch.backends.cudnn.benchmark = True

    # 2. 模型设置 Setup Model
    # 2.1 将命令行参数解析出来
    cfg = load_config(args)

    # 2.2 custom是构建的网络,否则引用model中的网络结构
    from custom import Custom
    siammask = Custom(anchors=cfg['anchors'])
    # 2.3 判断是否存在模型的权重文件
    if args.resume:
        assert isfile(args.resume), 'Please download {} first.'.format(
            args.resume)
        siammask = load_pretrain(siammask, args.resume)
    # 在运行推断前,需要调用 model.eval() 函数,以将 dropout 层 和 batch normalization 层设置为评估模式(非训练模式).
    # to(device)将张量复制到GPU上,之后的计算将在GPU上运行
    siammask.eval().to(device)

    # 首帧跟踪目标的位置
    x, y, w, h = initRect
    print(x)
    VeryBig = 999999999  # 用于将视频框调整到最大
    Cap = cv2.VideoCapture(vedio_path)  # 设置读取摄像头
    ret, frame = Cap.read()  # 读取帧
    ims = [frame]  # 把frame放入列表格式的frame, 因为原文是将每帧图片放入列表

    im = frame
    f = 0
    target_pos = np.array([x + w / 2, y + h / 2])
    target_sz = np.array([w, h])
    state = siamese_init(im, target_pos, target_sz, siammask,
                         cfg['hp'])  # init tracker"
    middlepath = "../data/middle.mp4"
    outpath = "../data/output.mp4"
    vediowriter = cv2.VideoWriter(middlepath,
                                  cv2.VideoWriter_fourcc('M', 'P', '4', 'V'),
                                  10, (320, 240))
    while (True):
        tic = cv2.getTickCount()
        ret, im = Cap.read()  # 逐个提取frame
        if (ret == False):
            break
        state = siamese_track(state, im, mask_enable=True,
                              refine_enable=True)  # track
        location = state['ploygon'].flatten()
        mask = state['mask'] > state['p'].seg_thr
        im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2]
        cv2.polylines(im, [np.int0(location).reshape((-1, 1, 2))], True,
                      (0, 255, 0), 3)
        vediowriter.write(im)
        cv2.imshow('SiamMask', im)
        key = cv2.waitKey(1)
        if key > 0:
            break

        f = f + 1
    vediowriter.release()

    return
Esempio n. 18
0
class TrackingManager:

    def __init__(self):
        resume = '/home/saad/Root/vision/Computer_Vision/Tracking-systems/SiamMask_DAVIS.pth'
        config = '/home/saad/Root/vision/Computer_Vision/Tracking-systems/config_davis.json'
        self.cfg = load_config(config=config)
        self.siammask = Custom(anchors=self.cfg['anchors'])
        self.siammask = load_pretrain(self.siammask, resume)

        self.active_boxes = []
        self.frames_generator = get_detection_output_as_frames_generator()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.siammask.eval().to(self.device)
        self.tracker_initialized = False
        self.state = None
        self.times = np_.asarray([])

    def track(self):

        current_frame, final_one = next(self.frames_generator)
        if final_one:
            yield 1

        s_ = time.time()
        init_boxes = select_active_boxes(current_frame, history_queue, 0.1)
        targets = []
        for box_ in init_boxes:
            target_pos = np.array([box_.x1 + box_.w / 2, box_.y1 + box_.h / 2])
            target_sz = np.array([box_.w, box_.h])
            # print("x1 = {}, y1 = {}, w = {}, h = {}".format(box.x1, box.y1, box.w, box.h))
            s = {"target_pos": target_pos, "target_sz": target_sz, "x": box_.x1, "y": box_.y1, "w": box_.w,
                 "h": box_.h}
            targets.append(s)
        self.active_boxes.extend(init_boxes)

        if len(init_boxes) > 0:
            if self.state is not None:
                targets.extend(self.state['targets'])
            self.state = siamese_init(current_frame.img, self.siammask, self.cfg['hp'], device=self.device,
                                      targets=targets)  # init tracker
            self.tracker_initialized = True

        if self.tracker_initialized and self.state is not None and len(self.state['targets']) > 0:
            self.state = siamese_track(self.state, current_frame.img)
            t = 0
            while t < len(self.state['targets']):
                # check that the tracked object still exist
                score = self.state['targets'][t]['score']
                if score <= .001:
                    print("remove box because its score is {}".format(self.state['targets'][t]['score']))
                    self.remove_gone_boxes(self.state['targets'][t])
                    del self.state['targets'][t]
                    continue

                target = self.state['targets'][t]

                boxx = select_matching_box(target['ploygon'], current_frame)
                self.state['targets'][t]['ploygon'] = [[boxx.x1, boxx.y1], [boxx.x1, boxx.y2], [boxx.x2, boxx.y2],
                                                       [boxx.x2, boxx.y1]]
                # assign ID to the tracked object
                x, y, w, h = target['x'], target['y'], target['w'], target['h']
                for o, active_box in enumerate(self.active_boxes):
                    if active_box.x1 == x and active_box.y1 == y and active_box.w == w and active_box.h == h:
                        boxx.ID = active_box.ID
                        boxx.type = active_box.type

                # frame.get_coord_depend_seg(mask,boxx.x1,boxx.y1,boxx.x2,boxx.y2, id)
                cv2.rectangle(current_frame.img, (int(boxx.x1), int(boxx.y1)), (int(boxx.x2), int(boxx.y2)),
                              (255, 0, 0), 2)
                center = [int(x) for x in target['target_pos']]
                cv2.putText(current_frame.img, str(boxx.ID), tuple(center), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0))
                t += 1
                # frame.add_polygon(target['ploygon'], id)
                current_frame.add_box(boxx)

                history_queue.append(current_frame)
            print(f"tracked in {time.time() - s_}")
            self.times = np_.append(self.times, [time.time() - s_], axis=0)
            cv2.putText(current_frame.img, "current frame", (20, 20), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0))
            current_frame.img = cv2.cvtColor(current_frame.img, cv2.COLOR_BGR2RGB)
            cv2.imwrite(f"/home/saad/Root/datasets/tracking/tracking_facepass_case/{current_frame.frame_indx}.jpg",
                        current_frame.img)
            yield current_frame

    def remove_gone_boxes(self, target):
        x, y, w, h = target['x'], target['y'], target['w'], target['h']
        i = 0
        for active_box in self.active_boxes:
            if active_box.x1 == x and active_box.y1 == y and active_box.w == w and active_box.h == h:
                del self.active_boxes[i]
                break
            i += 1

    def get_tracker_fps(self):
        avg = np_.average(self.times)
        return 1 / avg, avg
Esempio n. 19
0
def main():
    global args, logger, v_id  #全局变量
    args = parser.parse_args()  #args是test.py文件运行时,接受的参数
    cfg = load_config(args)  #加载 JSON 配置文件并设置args.arch的值。
    print(cfg)

    init_log('global', logging.INFO)
    if args.log != "":
        add_file_handler('global', args.log,
                         logging.INFO)  #add_file_handler 创建一个记录器并绑定文件句柄。

    logger = logging.getLogger('global')
    logger.info(args)

    # setup model         Custom 为论文实现的网络。如果不是“Custom”,加载 models 下指定的结构。
    if args.arch == 'Custom':  #args.arch参数,预训练模型的结构,命令行不给的话,默认为' ',
        from custom import Custom
        model = Custom(anchors=cfg['anchors']
                       )  #cfg是从config_vot.json的到的数据,所以跟踪时用的model.anchors字典中的数据
    else:
        parser.error('invalid architecture: {}'.format(args.arch))

    if args.resume:  #给了args.resume,如果args.resume不是文件,报错,
        assert isfile(args.resume), '{} is not a valid file'.format(
            args.resume)
        model = load_pretrain(
            model, args.resume)  #args.resume是文件load_pretrain ,能够处理网络之间的不一致
    model.eval()
    device = torch.device('cuda' if (
        torch.cuda.is_available() and not args.cpu) else 'cpu')
    model = model.to(device)

    # setup dataset,字典
    dataset = load_dataset(
        args.dataset)  #load_dataset 能够加载 VOT、DAVIS、ytb_vos 三种数据集。
    #仅以上三种数据源支持掩膜输出。

    # VOS or VOT?
    if args.dataset in ['DAVIS2016', 'DAVIS2017', 'ytb_vos'] and args.mask:
        vos_enable = True  # enable Mask output  ,使用掩膜输出
    else:
        vos_enable = False

    total_lost = 0  # VOT  跟踪任务有损失函数
    iou_lists = []  # VOS  分割任务
    speed_list = []

    #v_id视频索引从1起,video是视频名字
    for v_id, video in enumerate(dataset.keys(), start=1):
        if v_id == 2:
            exit()
        if args.video != '' and video != args.video:  #不成立,args.video默认是' '
            continue

        if vos_enable:  #分割任务,,,,分割任务和跟踪任务只能选一个
            iou_list, speed = track_vos(
                model,
                dataset[video],
                cfg['hp'] if 'hp' in cfg.keys() else None,
                args.mask,
                args.refine,
                args.dataset in ['DAVIS2017', 'ytb_vos'],
                device=device)
            iou_lists.append(iou_list)  #iou_list是什么类型的数据???
        else:  #跟踪任务
            lost, speed = track_vot(model,
                                    dataset[video],
                                    cfg['hp'] if 'hp' in cfg.keys() else None,
                                    args.mask,
                                    args.refine,
                                    device=device)
            total_lost += lost
        speed_list.append(speed)

    # report final result记录最终结果
    if vos_enable:  #如果进行的是分割任务
        for thr, iou in zip(thrs, np.mean(np.concatenate(iou_lists), axis=0)):
            logger.info('Segmentation Threshold {:.2f} mIoU: {:.3f}'.format(
                thr, iou))
    else:
        logger.info('Total Lost: {:d}'.format(total_lost))

    logger.info('Mean Speed: {:.2f} FPS'.format(np.mean(speed_list)))