Exemple #1
0
    def init_pymdnet(self, image, init_bbox):
        target_bbox = np.array(init_bbox)
        self.last_result = target_bbox
        self.pymodel = MDNet(os.path.join(base_path, 'DiMP_LTMU/pyMDNet/models/mdnet_imagenet_vid.pth'))
        if opts['use_gpu']:
            self.pymodel = self.pymodel.cuda()
        self.pymodel.set_learnable_params(opts['ft_layers'])

        # Init criterion and optimizer
        self.criterion = BCELoss()
        init_optimizer = set_optimizer(self.pymodel, opts['lr_init'], opts['lr_mult'])
        self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'], opts['lr_mult'])

        tic = time.time()

        # Draw pos/neg samples
        pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])(
            target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            SampleGenerator('uniform', image.size, opts['trans_neg_init'], opts['scale_neg_init'])(
                target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init']),
            SampleGenerator('whole', image.size)(
                target_bbox, int(opts['n_neg_init'] * 0.5), opts['overlap_neg_init'])])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        pos_feats = forward_samples(self.pymodel, image, pos_examples, opts)
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.feat_dim = pos_feats.size(-1)

        # Initial training
        train(self.pymodel, self.criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], opts=opts)
        del init_optimizer, neg_feats
        torch.cuda.empty_cache()

        # Train bbox regressor
        bbreg_examples = SampleGenerator('uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
                                         opts['aspect_bbreg'])(
            target_bbox, opts['n_bbreg'], opts['overlap_bbreg'])
        bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples, opts)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
        del bbreg_feats
        torch.cuda.empty_cache()
        # Init sample generators
        self.sample_generator = SampleGenerator('gaussian', image.size, opts['trans'], opts['scale'])
        self.pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'], opts['scale_pos'])
        self.neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'], opts['scale_neg'])

        # Init pos/neg features for update
        neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'], opts['overlap_neg_init'])
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.pos_feats_all = [pos_feats]
        self.neg_feats_all = [neg_feats]

        spf_total = time.time() - tic
Exemple #2
0
class Dimp_LTMU_Tracker(object):
    def __init__(self, image, region, p=None, groundtruth=None):
        self.p = p
        self.i = 0
        self.t_id = 0
        if groundtruth is not None:
            self.groundtruth = groundtruth

        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.3
        self.sess = tf.Session(config=tfconfig)
        init_gt1 = [region.x, region.y, region.width, region.height]
        init_gt = [
            init_gt1[1], init_gt1[0], init_gt1[1] + init_gt1[3],
            init_gt1[0] + init_gt1[2]
        ]  # ymin xmin ymax xmax

        self.last_gt = init_gt
        self.init_pymdnet(image, init_gt1)
        self.local_init(image, init_gt1)
        self.Golbal_Track_init(image, init_gt1)
        if self.p.use_mask:
            self.siammask_init(image, init_gt1)
        self.tc_init(self.p.model_dir)
        self.metric_init(image, np.array(init_gt1))
        self.dis_record = []
        self.state_record = []
        self.rv_record = []
        self.all_map = []
        self.count = 0

        local_state1, self.score_map, update, self.score_max, dis, flag, update_score = self.local_track(
            image)
        self.local_Tracker.pos = torch.FloatTensor([
            (self.last_gt[0] + self.last_gt[2] - 1) / 2,
            (self.last_gt[1] + self.last_gt[3] - 1) / 2
        ])
        self.local_Tracker.target_sz = torch.FloatTensor([
            (self.last_gt[2] - self.last_gt[0]),
            (self.last_gt[3] - self.last_gt[1])
        ])

    def get_first_state(self):
        return self.score_map, self.score_max

    def siammask_init(self, im, init_gt):
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        parser = argparse.ArgumentParser(description='PyTorch Tracking Demo')

        parser.add_argument(
            '--resume',
            default='SiamMask/experiments/siammask/SiamMask_VOT_LD.pth',
            type=str,
            metavar='PATH',
            help='path to latest checkpoint (default: none)')
        parser.add_argument(
            '--config',
            dest='config',
            default='SiamMask/experiments/siammask/config_vot19lt.json',
            help='hyper-parameter of SiamMask in json format')
        args = parser.parse_args()
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True

        # Setup Model
        cfg = load_config(args)
        self.siammask = Custom(anchors=cfg['anchors'])
        if args.resume:
            assert isfile(args.resume), '{} is not a valid file'.format(
                args.resume)
            self.siammask = load_pretrain(self.siammask, args.resume)

        self.siammask.eval().to(device)
        x = init_gt[0]
        y = init_gt[1]
        w = init_gt[2]
        h = init_gt[3]
        target_pos = np.array([x + w / 2, y + h / 2])
        target_sz = np.array([w, h])
        self.siamstate = siamese_init(im, target_pos, target_sz, self.siammask,
                                      cfg['hp'])

    def siammask_track(self, im):
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
        self.siamstate = siamese_track(self.siamstate,
                                       im,
                                       mask_enable=True,
                                       refine_enable=True)  # track
        # pdb.set_trace()
        score = np.max(self.siamstate['score'])
        location = self.siamstate['ploygon'].flatten()
        mask = self.siamstate['mask'] > self.siamstate['p'].seg_thr

        # im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2]
        #
        # cv2.namedWindow("SiamMask", cv2.WINDOW_NORMAL)
        # cv2.rectangle(im, (int(self.siamstate['target_pos'][0] - self.siamstate['target_sz'][0] / 2.0),
        #                    int(self.siamstate['target_pos'][1] - self.siamstate['target_sz'][1] / 2.0)),
        #               (int(self.siamstate['target_pos'][0] + self.siamstate['target_sz'][0] / 2.0),
        #                int(self.siamstate['target_pos'][1] + self.siamstate['target_sz'][1] / 2.0)), [0, 255, 0], 2)
        # # cv2.imwrite("/home/xiaobai/Desktop/MBMD_vot_code/figure/%05d.jpg"%frame_id, im[:, :, -1::-1])
        # cv2.imshow("SiamMask", im)
        # cv2.waitKey(1)
        return score, mask

    def Golbal_Track_init(self, image, init_box):
        init_box = [
            init_box[0], init_box[1], init_box[0] + init_box[2],
            init_box[1] + init_box[3]
        ]
        cfg_file = 'Global_Track/configs/qg_rcnn_r50_fpn.py'
        ckp_file = 'Global_Track/checkpoints/qg_rcnn_r50_fpn_coco_got10k_lasot.pth'
        transforms = data.BasicPairTransforms(train=False)
        self.Global_Tracker = GlobalTrack(cfg_file,
                                          ckp_file,
                                          transforms,
                                          name_suffix='qg_rcnn_r50_fpn')
        self.Global_Tracker.init(image, init_box)

    def Global_Track_eval(self, image, num):
        # xywh
        results = self.Global_Tracker.update(image)
        index = np.argsort(results[:, -1])[::-1]
        max_index = index[:num]
        can_boxes = results[max_index][:, :4]
        can_boxes = np.array([
            can_boxes[:, 0], can_boxes[:,
                                       1], can_boxes[:, 2] - can_boxes[:, 0],
            can_boxes[:, 3] - can_boxes[:, 1]
        ]).transpose()
        return can_boxes

    def init_pymdnet(self, image, init_bbox):
        target_bbox = np.array(init_bbox)
        self.last_result = target_bbox
        self.pymodel = MDNet('./pyMDNet/models/mdnet_imagenet_vid.pth')
        if opts['use_gpu']:
            self.pymodel = self.pymodel.cuda()
        self.pymodel.set_learnable_params(opts['ft_layers'])

        # Init criterion and optimizer
        self.criterion = BCELoss()
        init_optimizer = set_optimizer(self.pymodel, opts['lr_init'],
                                       opts['lr_mult'])
        self.update_optimizer = set_optimizer(self.pymodel, opts['lr_update'],
                                              opts['lr_mult'])

        tic = time.time()

        # Draw pos/neg samples
        pos_examples = SampleGenerator('gaussian', image.size,
                                       opts['trans_pos'], opts['scale_pos'])(
                                           target_bbox, opts['n_pos_init'],
                                           opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                            opts['scale_neg_init'])(target_bbox,
                                                    int(opts['n_neg_init'] *
                                                        0.5),
                                                    opts['overlap_neg_init']),
            SampleGenerator('whole', image.size)(target_bbox,
                                                 int(opts['n_neg_init'] * 0.5),
                                                 opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        pos_feats = forward_samples(self.pymodel, image, pos_examples, opts)
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.feat_dim = pos_feats.size(-1)

        # Initial training
        train(self.pymodel,
              self.criterion,
              init_optimizer,
              pos_feats,
              neg_feats,
              opts['maxiter_init'],
              opts=opts)
        del init_optimizer, neg_feats
        torch.cuda.empty_cache()

        # Train bbox regressor
        bbreg_examples = SampleGenerator(
            'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
            opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                                  opts['overlap_bbreg'])
        bbreg_feats = forward_samples(self.pymodel, image, bbreg_examples,
                                      opts)
        self.bbreg = BBRegressor(image.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
        del bbreg_feats
        torch.cuda.empty_cache()
        # Init sample generators
        self.sample_generator = SampleGenerator('gaussian', image.size,
                                                opts['trans'], opts['scale'])
        self.pos_generator = SampleGenerator('gaussian', image.size,
                                             opts['trans_pos'],
                                             opts['scale_pos'])
        self.neg_generator = SampleGenerator('uniform', image.size,
                                             opts['trans_neg'],
                                             opts['scale_neg'])

        # Init pos/neg features for update
        neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'],
                                          opts['overlap_neg_init'])
        neg_feats = forward_samples(self.pymodel, image, neg_examples, opts)
        self.pos_feats_all = [pos_feats]
        self.neg_feats_all = [neg_feats]

        spf_total = time.time() - tic

    def pymdnet_eval(self, image, samples):
        sample_scores = forward_samples(self.pymodel,
                                        image,
                                        samples,
                                        out_layer='fc6',
                                        opts=opts)
        return sample_scores[:, 1][:].cpu().numpy()

    # def pymdnet_track(self, image):
    #     self.image = image
    #     target_bbox = self.last_result
    #     samples = self.sample_generator(target_bbox, opts['n_samples'])
    #     sample_scores = forward_samples(self.pymodel, image, samples, out_layer='fc6', opts=opts)
    #
    #     top_scores, top_idx = sample_scores[:, 1].topk(5)
    #     top_idx = top_idx.cpu().numpy()
    #     target_score = top_scores.mean()
    #     target_bbox = samples[top_idx].mean(axis=0)
    #
    #     success = target_score > 0
    #
    #     # Expand search area at failure
    #     if success:
    #         self.sample_generator.set_trans(opts['trans'])
    #     else:
    #         self.sample_generator.expand_trans(opts['trans_limit'])
    #
    #     self.last_result = target_bbox
    #     # Bbox regression
    #     bbreg_bbox = self.pymdnet_bbox_reg(success, samples, top_idx)
    #
    #     # Save result
    #     region = bbreg_bbox
    #
    #     # Data collect
    #     if success:
    #         self.collect_samples_pymdnet()
    #
    #     # Short term update
    #     if not success:
    #         self.pymdnet_short_term_update()
    #
    #     # Long term update
    #     elif self.i % opts['long_interval'] == 0:
    #         self.pymdnet_long_term_update()
    #
    #     return region, target_score

    def collect_samples_pymdnet(self, image):
        self.t_id += 1
        target_bbox = np.array([
            self.last_gt[1], self.last_gt[0],
            self.last_gt[3] - self.last_gt[1],
            self.last_gt[2] - self.last_gt[0]
        ])
        pos_examples = self.pos_generator(target_bbox, opts['n_pos_update'],
                                          opts['overlap_pos_update'])
        if len(pos_examples) > 0:
            pos_feats = forward_samples(self.pymodel, image, pos_examples,
                                        opts)
            self.pos_feats_all.append(pos_feats)
        if len(self.pos_feats_all) > opts['n_frames_long']:
            del self.pos_feats_all[0]

        neg_examples = self.neg_generator(target_bbox, opts['n_neg_update'],
                                          opts['overlap_neg_update'])
        if len(neg_examples) > 0:
            neg_feats = forward_samples(self.pymodel, image, neg_examples,
                                        opts)
            self.neg_feats_all.append(neg_feats)
        if len(self.neg_feats_all) > opts['n_frames_short']:
            del self.neg_feats_all[0]

    def pymdnet_short_term_update(self):
        # Short term update
        nframes = min(opts['n_frames_short'], len(self.pos_feats_all))
        pos_data = torch.cat(self.pos_feats_all[-nframes:], 0)
        neg_data = torch.cat(self.neg_feats_all, 0)
        train(self.pymodel,
              self.criterion,
              self.update_optimizer,
              pos_data,
              neg_data,
              opts['maxiter_update'],
              opts=opts)

    def pymdnet_long_term_update(self):
        if self.t_id % opts['long_interval'] == 0:
            # Long term update
            pos_data = torch.cat(self.pos_feats_all, 0)
            neg_data = torch.cat(self.neg_feats_all, 0)
            train(self.pymodel,
                  self.criterion,
                  self.update_optimizer,
                  pos_data,
                  neg_data,
                  opts['maxiter_update'],
                  opts=opts)

    #
    # def pymdnet_bbox_reg(self, success, samples, top_idx):
    #     target_bbox = self.last_result
    #     if success:
    #         bbreg_samples = samples[top_idx]
    #         if top_idx.shape[0] == 1:
    #             bbreg_samples = bbreg_samples[None, :]
    #         bbreg_feats = forward_samples(self.pymodel, self.image, bbreg_samples, opts)
    #         bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples)
    #         bbreg_bbox = bbreg_samples.mean(axis=0)
    #     else:
    #         bbreg_bbox = target_bbox
    #     return bbreg_bbox

    def metric_init(self, im, init_box):
        self.metric_model = ft_net(class_num=1120)
        path = '../utils/metric_net/metric_model/metric_model.pt'
        self.metric_model.eval()
        self.metric_model = self.metric_model.cuda()
        self.metric_model.load_state_dict(torch.load(path))
        tmp = np.random.rand(1, 3, 107, 107)
        tmp = (Variable(torch.Tensor(tmp))).type(torch.FloatTensor).cuda()
        # get target feature
        self.metric_model(tmp)
        init_box = init_box.reshape((1, 4))
        anchor_region = me_extract_regions(im, init_box)
        anchor_region = process_regions(anchor_region)
        anchor_region = torch.Tensor(anchor_region)
        anchor_region = (Variable(anchor_region)).type(
            torch.FloatTensor).cuda()
        self.anchor_feature, _ = self.metric_model(anchor_region)

    def metric_eval(self, im, boxes, anchor_feature):
        box_regions = me_extract_regions(np.array(im), boxes)
        box_regions = process_regions(box_regions)
        box_regions = torch.Tensor(box_regions)
        box_regions = (Variable(box_regions)).type(torch.FloatTensor).cuda()
        box_features, class_result = self.metric_model(box_regions)

        class_result = torch.softmax(class_result, dim=1)
        ap_dist = torch.norm(anchor_feature - box_features, 2, dim=1).view(-1)
        return ap_dist

    def tc_init(self, model_dir):
        self.tc_model = tclstm()
        self.X_input = tf.placeholder(
            "float", [None, tcopts['time_steps'], tcopts['lstm_num_input']])
        self.maps = tf.placeholder("float", [None, 19, 19, 1])
        self.map_logits = self.tc_model.map_net(self.maps)
        self.Inputs = tf.concat((self.X_input, self.map_logits), axis=2)
        self.logits, _ = self.tc_model.net(self.Inputs)

        variables_to_restore = [
            var for var in tf.global_variables()
            if (var.name.startswith('tclstm') or var.name.startswith('mapnet'))
        ]
        saver = tf.train.Saver(var_list=variables_to_restore)
        if self.p.checkpoint is None:
            checkpoint = tf.train.latest_checkpoint(
                os.path.join('./meta_updater', model_dir))
        else:
            checkpoint = './meta_updater/' + self.p.model_dir + '/lstm_model.ckpt-' + str(
                self.p.checkpoint)
        saver.restore(self.sess, checkpoint)

    def local_init(self, image, init_bbox):
        local_tracker = Tracker('dimp', 'dimp50')
        params = local_tracker.get_parameters()

        debug_ = getattr(params, 'debug', 0)
        params.debug = debug_

        params.tracker_name = local_tracker.name
        params.param_name = local_tracker.parameter_name

        self.local_Tracker = local_tracker.tracker_class(params)
        init_box = dict()
        init_box['init_bbox'] = init_bbox
        self.local_Tracker.initialize(image, init_box)

    def local_track(self, image):
        state, score_map, test_x, scale_ind, sample_pos, sample_scales, flag, s = self.local_Tracker.track_updater(
            image)
        update_score = 0
        update_flag = flag not in ['not_found', 'uncertain']
        update = update_flag
        max_score = max(score_map.flatten())
        self.all_map.append(score_map)
        local_state = np.array(state).reshape((1, 4))
        ap_dis = self.metric_eval(image, local_state, self.anchor_feature)
        self.dis_record.append(ap_dis.data.cpu().numpy()[0])
        h = image.shape[0]
        w = image.shape[1]
        self.state_record.append([
            local_state[0][0] / w, local_state[0][1] / h,
            (local_state[0][0] + local_state[0][2]) / w,
            (local_state[0][1] + local_state[0][3]) / h
        ])
        self.rv_record.append(max_score)
        if len(self.state_record) >= self.p.start_frame:
            dis = np.array(self.dis_record[-tcopts["time_steps"]:]).reshape(
                (tcopts["time_steps"], 1))
            rv = np.array(self.rv_record[-tcopts["time_steps"]:]).reshape(
                (tcopts["time_steps"], 1))
            state_tc = np.array(self.state_record[-tcopts["time_steps"]:])
            map_input = np.array(self.all_map[-tcopts["time_steps"]:])
            map_input = np.reshape(map_input,
                                   [tcopts['time_steps'], 1, 19, 19])
            map_input = map_input.transpose((0, 2, 3, 1))
            X_input = np.concatenate((state_tc, rv, dis), axis=1)
            logits = self.sess.run(self.logits,
                                   feed_dict={
                                       self.X_input:
                                       np.expand_dims(X_input, axis=0),
                                       self.maps:
                                       map_input
                                   })
            update = logits[0][0] < logits[0][1]
            update_score = logits[0][1]

        hard_negative = (flag == 'hard_negative')
        learning_rate = getattr(self.local_Tracker.params,
                                'hard_negative_learning_rate',
                                None) if hard_negative else None

        if update:
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.local_Tracker.get_iounet_box(
                self.local_Tracker.pos, self.local_Tracker.target_sz,
                sample_pos[scale_ind, :], sample_scales[scale_ind])

            # Update the classifier model
            self.local_Tracker.update_classifier(train_x, target_box,
                                                 learning_rate, s[scale_ind,
                                                                  ...])
        self.last_gt = [
            state[1], state[0], state[1] + state[3], state[0] + state[2]
        ]
        return state, score_map, update, max_score, ap_dis.data.cpu().numpy(
        )[0], flag, update_score

    def locate(self, image):

        # Convert image
        im = numpy_to_torch(image)
        self.local_Tracker.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.local_Tracker.pos.round()
        sample_scales = self.local_Tracker.target_scale * self.local_Tracker.params.scale_factors
        test_x = self.local_Tracker.extract_processed_sample(
            im, self.local_Tracker.pos, sample_scales,
            self.local_Tracker.img_sample_sz)

        # Compute scores
        scores_raw = self.local_Tracker.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.local_Tracker.localize_target(
            scores_raw)
        return translation_vec, scale_ind, s, flag, sample_pos, sample_scales, test_x

    def local_update(self,
                     sample_pos,
                     translation_vec,
                     scale_ind,
                     sample_scales,
                     s,
                     test_x,
                     update_flag=None):

        # Check flags and set learning rate if hard negative
        if update_flag is None:
            update_flag = self.flag not in ['not_found', 'uncertain']
        hard_negative = (self.flag == 'hard_negative')
        learning_rate = self.local_Tracker.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.local_Tracker.get_label_function(
                sample_pos, sample_scales[scale_ind])

            # Update memory
            self.local_Tracker.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.local_Tracker.filter_optimizer.run(
                self.local_Tracker.params.hard_negative_CG_iter)
        elif (self.local_Tracker.frame_num -
              1) % self.local_Tracker.params.train_skipping == 0:
            self.local_Tracker.filter_optimizer.run(
                self.local_Tracker.params.CG_iter)

    def tracking(self, image):
        self.i += 1
        mask = None
        candidate_bboxes = None
        # state, pyscore = self.pymdnet_track(image)
        # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]]
        self.local_Tracker.pos = torch.FloatTensor([
            (self.last_gt[0] + self.last_gt[2] - 1) / 2,
            (self.last_gt[1] + self.last_gt[3] - 1) / 2
        ])
        self.local_Tracker.target_sz = torch.FloatTensor([
            (self.last_gt[2] - self.last_gt[0]),
            (self.last_gt[3] - self.last_gt[1])
        ])
        tic = time.time()
        local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track(
            image)

        md_score = self.pymdnet_eval(image,
                                     np.array(local_state).reshape([-1, 4]))[0]
        self.score_max = md_score

        if md_score > 0 and flag == 'normal':
            self.flag = 'found'
            if self.p.use_mask:
                self.siamstate['target_pos'] = self.local_Tracker.pos.numpy(
                )[::-1]
                self.siamstate[
                    'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1]
                siamscore, mask = self.siammask_track(
                    cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
                self.local_Tracker.pos = torch.FloatTensor(
                    self.siamstate['target_pos'][::-1].copy())
                self.local_Tracker.target_sz = torch.FloatTensor(
                    self.siamstate['target_sz'][::-1].copy())
                local_state = torch.cat(
                    (self.local_Tracker.pos[[1, 0]] -
                     (self.local_Tracker.target_sz[[1, 0]] - 1) / 2,
                     self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy()
            self.last_gt = np.array([
                local_state[1], local_state[0],
                local_state[1] + local_state[3],
                local_state[0] + local_state[2]
            ])
        elif md_score < 0 or flag == 'not_found':
            self.count += 1
            self.flag = 'not_found'
            candidate_bboxes = self.Global_Track_eval(image, 10)
            candidate_scores = self.pymdnet_eval(image, candidate_bboxes)
            max_id = np.argmax(candidate_scores)
            if candidate_scores[max_id] > 0:
                redet_bboxes = candidate_bboxes[max_id]
                if self.count >= 5:
                    self.last_gt = np.array([
                        redet_bboxes[1], redet_bboxes[0],
                        redet_bboxes[1] + redet_bboxes[3],
                        redet_bboxes[2] + redet_bboxes[0]
                    ])
                    self.local_Tracker.pos = torch.FloatTensor([
                        (self.last_gt[0] + self.last_gt[2] - 1) / 2,
                        (self.last_gt[1] + self.last_gt[3] - 1) / 2
                    ])
                    self.local_Tracker.target_sz = torch.FloatTensor([
                        (self.last_gt[2] - self.last_gt[0]),
                        (self.last_gt[3] - self.last_gt[1])
                    ])
                    self.score_max = candidate_scores[max_id]
                    self.count = 0
        if update:
            self.collect_samples_pymdnet(image)

        self.pymdnet_long_term_update()

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]
        toc = time.time() - tic
        print(toc)
        # if self.flag == 'found' and self.score_max > 0:
        #     confidence_score = 0.99
        # elif self.flag == 'not_found':
        #     confidence_score = 0.0
        # else:
        #     confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1)
        confidence_score = np.clip(
            (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) /
            2, 0, 1)
        if self.p.visualization:
            show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR),
                     np.array(self.last_gt, dtype=np.int32),
                     '2',
                     groundtruth=self.groundtruth,
                     update=update_score,
                     can_bboxes=candidate_bboxes,
                     frame_id=self.i,
                     tracker_score=md_score,
                     mask=mask)

        return [
            float(self.last_gt[1]),
            float(self.last_gt[0]),
            float(width),
            float(height)
        ], self.score_map, 0, confidence_score, 0
Exemple #3
0
neg_feats = forward_samples(model, image, neg_examples)

# Initial training
train(model, criterion, init_optimizer, pos_feats, neg_feats,
      opts['maxiter_init'])
del init_optimizer, neg_feats
torch.cuda.empty_cache()

# Train bbox regressor
bbreg_examples = SampleGenerator('uniform', image.size, opts['trans_bbreg'],
                                 opts['scale_bbreg'],
                                 opts['aspect_bbreg'])(target_bbox,
                                                       opts['n_bbreg'],
                                                       opts['overlap_bbreg'])
bbreg_feats = forward_samples(model, image, bbreg_examples)
bbreg = BBRegressor(image.size)
bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
del bbreg_feats
torch.cuda.empty_cache()

# Init sample generators for update
sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                   opts['scale'])
pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                opts['scale_pos'])
neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                opts['scale_neg'])

# Init pos/neg features for update
neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                             opts['overlap_neg_init'])
Exemple #4
0
def run_mdnet(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update:这三个是【生成器】,不是产生的数据
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox:指的是下一次的target_bbox,用于迭代
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(
            model, image, samples, out_layer='fc6'
        )  #forward_samples是根据当前的给当前的多个sample打分数,【相当于执行了一次网络判断】

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)  #多个target_bbox进行均值优化
        success = target_score > 0

        # Expand search area at failure:在【跟踪的同时】,根据跟踪情况,采用不同的sample生成参数
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression :利用【当前:只是对当前帧图像进行回归,没有考虑前几帧】几名对应的box,最为regression的输入,来产生一个更好的Bbox
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update :每张图片都要更新一次模型,这次的输入数据就是【前几帧累计的正样本和负样本】
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              model_path='models/model001.pth'):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    opts['model_path'] = model_path

    print('********')
    print('model:', opts['model_path'])
    print('********')

    assert (model_path == 'models/model000.pth'
            or model_path == 'models/model001.pth')

    if model_path == 'models/model000.pth': model = MDNet0(opts['model_path'])
    else: model = MDNet1(opts['model_path'])

    if opts['use_gpu']:
        model = model.cuda()

    # Init criterion and optimizer
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    print(pos_feats)
    neg_feats = forward_samples(model, image, neg_examples)
    print(neg_feats)

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)

        # for top 5 samples, maximize score using hill-climbing algorithm
        for j in range(5):
            sample_ = samples[top_idx[j]]
            last_top_score = None

            # hill-climbing search
            while True:
                sample_left_p = [
                    sample_[0] + 1, sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_left_n = [
                    sample_[0] - 1, sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_up_p = [
                    sample_[0], sample_[1] + 1, sample_[2], sample_[3] - 1
                ]
                sample_up_n = [
                    sample_[0], sample_[1] - 1, sample_[2], sample_[3] + 1
                ]
                sample_right_p = [
                    sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                ]
                sample_right_n = [
                    sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                ]
                sample_bottom_p = [
                    sample_[0], sample_[1], sample_[2], sample_[3] + 1
                ]
                sample_bottom_n = [
                    sample_[0], sample_[1], sample_[2], sample_[3] - 1
                ]

                all_samples = [
                    sample_left_p, sample_left_n, sample_up_p, sample_up_n,
                    sample_right_p, sample_right_n, sample_bottom_p,
                    sample_bottom_n
                ]

                hillClimbingSS = forward_samples(model,
                                                 image,
                                                 np.array(all_samples),
                                                 out_layer='fc6')
                top_score, top_index = hillClimbingSS[:, 1].topk(1)
                top_score_float = top_score.cpu().numpy()[0]

                # End of hill climbing: this is THE BEST!
                if last_top_score != None:
                    if top_score_float < last_top_score: break

                sample_ = all_samples[top_index]
                samples[top_idx[j]] = all_samples[top_index]
                last_top_score = top_score_float

        # modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        sampleStore = []
        for j in range(len(samples)):
            temp = []
            for k in range(4):
                temp.append(samples[j][k])
            sampleStore.append(temp)

        # if mean score of bbox < 0, find everywhere
        target_score = top_scores.mean()

        if target_score < 0:
            # print('')
            # print('last bbox:')
            # print(result[i-1])
            last_left = result[i - 1][0]
            last_top = result[i - 1][1]

            # print('')
            # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
            # print('')
            # print('sample top scores (before):')
            # print(top_scores)
            # print(top_idx)

            cnt = 0
            rl = [32, 16]

            for _ in range(len(rl)):
                everywhere_sample = []

                # find everywhere (near the last bbox)
                meanWidth = 0.0
                meanHeight = 0.0
                for j in range(len(samples)):
                    meanWidth += samples[j][2]
                    meanHeight += samples[j][3]
                meanWidth /= len(samples)
                meanHeight /= len(samples)

                width = image.size[0]
                height = image.size[1]

                for j in range(32):
                    for k in range(32):
                        jk = [
                            last_left + (31 - 2 * j) * meanWidth / rl[_],
                            last_top + (31 - 2 * k) * meanHeight / rl[_],
                            meanWidth, meanHeight
                        ]
                        # print(j, k, jk)
                        everywhere_sample.append(jk)

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere_sample:')
                # for j in range(len(everywhere_sample)): print(j, everywhere_sample[j], everywhere_scores[j])

                # print('')
                # print('everywhere top scores (before):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # for top 5 samples in everywhere_sample, maximize score using hill-climbing algorithm
                for j in range(5):
                    # print('')
                    sample_ = everywhere_sample[everywhere_top_idx[j]]
                    last_top_score = None

                    # hill-climbing search
                    while True:
                        sample_left_p = [
                            sample_[0] + 1, sample_[1], sample_[2] - 1,
                            sample_[3]
                        ]
                        sample_left_n = [
                            sample_[0] - 1, sample_[1], sample_[2] + 1,
                            sample_[3]
                        ]
                        sample_up_p = [
                            sample_[0], sample_[1] + 1, sample_[2],
                            sample_[3] - 1
                        ]
                        sample_up_n = [
                            sample_[0], sample_[1] - 1, sample_[2],
                            sample_[3] + 1
                        ]
                        sample_right_p = [
                            sample_[0], sample_[1], sample_[2] + 1, sample_[3]
                        ]
                        sample_right_n = [
                            sample_[0], sample_[1], sample_[2] - 1, sample_[3]
                        ]
                        sample_bottom_p = [
                            sample_[0], sample_[1], sample_[2], sample_[3] + 1
                        ]
                        sample_bottom_n = [
                            sample_[0], sample_[1], sample_[2], sample_[3] - 1
                        ]

                        all_samples = [
                            sample_left_p, sample_left_n, sample_up_p,
                            sample_up_n, sample_right_p, sample_right_n,
                            sample_bottom_p, sample_bottom_n
                        ]

                        hillClimbingSS = forward_samples(model,
                                                         image,
                                                         np.array(all_samples),
                                                         out_layer='fc6')
                        top_score, top_index = hillClimbingSS[:, 1].topk(1)
                        top_score_float = top_score.cpu().numpy()[0]

                        # End of hill climbing: this is THE BEST!
                        if last_top_score != None:
                            # print(last_top_score)
                            if top_score_float < last_top_score: break

                        sample_ = all_samples[top_index]
                        everywhere_sample[
                            everywhere_top_idx[j]] = all_samples[top_index]
                        last_top_score = top_score_float

                everywhere_scores = forward_samples(
                    model, image, np.array(everywhere_sample), out_layer='fc6')
                everywhere_top_scores, everywhere_top_idx = everywhere_scores[:,
                                                                              1].topk(
                                                                                  5
                                                                              )

                # print('')
                # print('everywhere top scores (after):')
                # print(everywhere_top_scores)
                # print(everywhere_top_idx)
                # for j in range(5): print(everywhere_sample[everywhere_top_idx[j]])

                # merge 'samples' with everywhere samples
                everywhere_top5 = []
                for j in range(5):
                    everywhere_top5.append(
                        everywhere_sample[everywhere_top_idx[j]])
                samples = np.concatenate((samples, np.array(everywhere_top5)))

                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

                if top_scores.mean() > 0:
                    # print('')
                    # for j in range(len(samples)): print(j, samples[j], sample_scores[j])
                    # print('')
                    # print('sample top scores (after):')
                    # print(top_scores)
                    # print(top_idx)
                    break
                cnt += 1

            # failure -> recover original samples
            if cnt == 2:
                # print('recovered')
                samples = np.array(sampleStore)
                sample_scores = forward_samples(model,
                                                image,
                                                samples,
                                                out_layer='fc6')
                top_scores, top_idx = sample_scores[:, 1].topk(5)

        # finally modify sample scores array
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)

        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(
                    savefig_dir,
                    ('M' + model_path[14] + 'T3_' + '{:04d}.jpg'.format(i))),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    plt.close('all')
    return result, result_bb, fps, overlap
Exemple #6
0
def run_vtaan(img_list, init_bbox, gt=None, savefig_dir='', display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    model = MDNet(opts['model_path'])
    model_g = NetG()
    if opts['use_gpu']:
        model = model.cuda()
        model_g = model_g.cuda()
    GBP = guided_backprop.GuidedBackprop(model, 1)

    # Init criterion and optimizer
    criterion = BCELoss()
    criterion_g = torch.nn.MSELoss(reduction='sum')
    model.set_learnable_params(opts['ft_layers'])
    model_g.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])

    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    # Extract pos/neg features
    pos_feats = forward_samples(model, image, pos_examples)
    neg_feats = forward_samples(model, image, neg_examples)

    pos_imgids = np.array([[0]] * pos_feats.size(0))
    neg_imgids = np.array([[0]] * neg_feats.size(0))

    feat_dim = pos_feats.size(-1)

    # Initial training
    train(model, None, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'], pos_imgids, pos_examples, neg_imgids,
          neg_examples, img_list, GBP)
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()
    g_pretrain(model, model_g, criterion_g, pos_feats)
    torch.cuda.empty_cache()

    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(model, image, bbreg_examples)
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats[:opts['n_pos_update']]]
    neg_feats_all = [neg_feats[:opts['n_neg_update']]]

    pos_examples_all = [pos_examples[:opts['n_pos_update']]]
    neg_examples_all = [neg_examples[:opts['n_neg_update']]]

    pos_imgids_all = [pos_imgids[:opts['n_pos_update']]]
    neg_imgids_all = [neg_imgids[:opts['n_neg_update']]]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    # Main loop
    for i in range(1, len(img_list)):

        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]
                del pos_examples_all[0]
                del pos_imgids_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)

            pos_examples_all.append(pos_examples)
            neg_examples_all.append(neg_examples)

            pos_imgids_all.append(np.array([[i]] * pos_feats.size(0)))
            neg_imgids_all.append(np.array([[i]] * neg_feats.size(0)))

            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]
                del neg_examples_all[0]
                del neg_imgids_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(
                np.stack(pos_examples_all[-nframes:], 0)).view(-1, 4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(
                np.stack(pos_imgids_all[-nframes:], 0)).view(-1, 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            train(model, None, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'], pos_imgids_data, pos_examples_data,
                  neg_imgids_data, neg_examples_data, img_list, GBP)

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = t.stack(pos_feats_all, 0).view(-1, feat_dim)
            neg_data = t.stack(neg_feats_all, 0).view(-1, feat_dim)

            pos_examples_data = torch.from_numpy(np.stack(pos_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()
            neg_examples_data = torch.from_numpy(np.stack(neg_examples_all,
                                                          0)).view(-1,
                                                                   4).numpy()

            pos_imgids_data = torch.from_numpy(np.stack(pos_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()
            neg_imgids_data = torch.from_numpy(np.stack(neg_imgids_all,
                                                        0)).view(-1,
                                                                 1).numpy()

            # train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'],
            #       pos_imgids_data, pos_examples_data, neg_imgids_data, neg_examples_data, img_list, GBP)
            train(model, model_g, criterion, update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'], None, None, None, None,
                  img_list, GBP)

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        if gt is None:
            print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'.format(
                i + 1, len(img_list), target_score, spf))
        else:
            overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
            print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'.
                  format(i + 1, len(img_list), overlap[i], target_score, spf))

    if gt is not None:
        print('meanIOU: {:.3f}'.format(overlap.mean()))
    fps = len(img_list) / spf_total
    return result, result_bb, fps
Exemple #7
0
def run_mdnet(img_list,
              init_bbox,
              gt=None,
              savefig_dir='',
              display=False,
              loss_index=1,
              model_path=opts['model_path'],
              seq_name=None):

    #def run_mdnet(k, img_list, init_bbox, gt=None, savefig_dir='', display=False,
    #              loss_index=1, model_path=opts['model_path'], seq_name=None):

    ############################
    if fewer_images:
        num_images = min(sequence_len_limit, len(img_list))
    else:
        num_images = len(img_list)
    ############################

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list), 4))
    result_bb = np.zeros((len(img_list), 4))
    result[0] = target_bbox
    result_bb[0] = target_bbox

    # Init iou and pred_iou
    iou_list = np.zeros((len(img_list), 1))  # shape: [113.1) ### list of ious
    iou_list[0] = 1.0  ### in first frame gt=result_bb (by definition)

    if gt is not None:
        overlap = np.zeros(len(img_list))
        overlap[0] = 1

    # Init model
    # model = MDNet(model_path=opts['model_path'],use_gpu=opts['use_gpu'])
    model = MDNet(model_path=model_path, use_gpu=opts['use_gpu'])
    if opts['use_gpu']:
        model = model.cuda()

    print('Init criterion and optimizer')
    criterion = BCELoss()
    model.set_learnable_params(opts['ft_layers'])
    init_optimizer = set_optimizer(model, opts['lr_init'], opts['lr_mult'])
    update_optimizer = set_optimizer(model, opts['lr_update'], opts['lr_mult'])

    tic = time.time()
    # Load first image
    image = Image.open(img_list[0]).convert('RGB')

    print('Draw pos/neg samples')
    # Draw pos/neg samples
    pos_examples = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                   opts['scale_pos'])(target_bbox,
                                                      opts['n_pos_init'],
                                                      opts['overlap_pos_init'])
    neg_examples = np.concatenate([
        SampleGenerator('uniform', image.size, opts['trans_neg_init'],
                        opts['scale_neg_init'])(target_bbox,
                                                int(opts['n_neg_init'] * 0.5),
                                                opts['overlap_neg_init']),
        SampleGenerator('whole', image.size)(target_bbox,
                                             int(opts['n_neg_init'] * 0.5),
                                             opts['overlap_neg_init'])
    ])
    neg_examples = np.random.permutation(neg_examples)

    print('Extract pos/neg features')
    # Extract pos/neg features
    if fewer_images:  # shorter run in general, less accurate
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)
    else:
        pos_feats = forward_samples(model, image, pos_examples)
        neg_feats = forward_samples(model, image, neg_examples)

    print('Initial training')
    # Initial training
    train(model,
          criterion,
          init_optimizer,
          pos_feats,
          neg_feats,
          opts['maxiter_init'],
          loss_index=loss_index)  ### iou_pred_list
    del init_optimizer, neg_feats
    torch.cuda.empty_cache()

    print('Train bbox regressor')
    # Train bbox regressor
    bbreg_examples = SampleGenerator(
        'uniform', image.size, opts['trans_bbreg'], opts['scale_bbreg'],
        opts['aspect_bbreg'])(target_bbox, opts['n_bbreg'],
                              opts['overlap_bbreg'])
    bbreg_feats = forward_samples(
        model, image,
        bbreg_examples)  # calc features ### shape: [927, 4608] ###
    bbreg = BBRegressor(image.size)
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
    del bbreg_feats
    torch.cuda.empty_cache()

    # Init sample generators for update
    sample_generator = SampleGenerator('gaussian', image.size, opts['trans'],
                                       opts['scale'])
    pos_generator = SampleGenerator('gaussian', image.size, opts['trans_pos'],
                                    opts['scale_pos'])
    neg_generator = SampleGenerator('uniform', image.size, opts['trans_neg'],
                                    opts['scale_neg'])

    print('Init pos/neg features for update')
    # Init pos/neg features for update
    neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                 opts['overlap_neg_init'])
    neg_feats = forward_samples(model, image, neg_examples)
    pos_feats_all = [pos_feats]
    neg_feats_all = [neg_feats]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (image.size[0] / dpi, image.size[1] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(image, aspect='auto')

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)
            #################
            num_gts = np.minimum(gt.shape[0], num_images)
            # print('num_gts.shape: ', num_gts.shape)
            gt_centers = gt[:num_gts, :2] + gt[:num_gts, 2:] / 2
            result_centers = np.zeros_like(gt[:num_gts, :2])
            result_centers[0] = gt_centers[0]
            result_ious = np.zeros(num_gts, dtype='float64')
            result_ious[0] = 1.
            #################

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    print('Main Loop')
    # Main loop
    spf_total = 0  # I don't want to take into account initialization
    for i in tqdm(range(1, num_images)):
        # for i in range(1, len(img_list)):
        #print('Frame: ', i)
        tic = time.time()
        # Load image
        image = Image.open(img_list[i]).convert('RGB')

        #print('Estimate target bbox (in run_mdnet)')
        # Estimate target bbox
        samples = sample_generator(target_bbox, opts['n_samples'])
        sample_scores = forward_samples(model, image, samples, out_layer='fc6')

        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu()
        target_score = top_scores.mean()
        target_bbox = samples[top_idx]
        if top_idx.shape[0] > 1:
            target_bbox = target_bbox.mean(axis=0)
        success = target_score > 0

        # Expand search area at failure
        if success:
            sample_generator.set_trans(opts['trans'])
        else:
            sample_generator.expand_trans(opts['trans_limit'])

        #print('Bbox regression (in run_mdnet)')
        # Bbox regression
        if success:
            bbreg_samples = samples[top_idx]
            if top_idx.shape[0] == 1:
                bbreg_samples = bbreg_samples[None, :]
            bbreg_feats = forward_samples(model, image, bbreg_samples)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)
        else:
            bbreg_bbox = target_bbox

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox
        iou_list[i] = overlap_ratio(gt[i], result_bb[i])

        ###########################################
        # identify tracking failure and abort when in VOT mode

        IoU = overlap_ratio(result_bb[i], gt[i])[0]
        if (IoU == 0) and init_after_loss:
            print('    * lost track in frame %d since init*' % (i))
            result_distances = scipy.spatial.distance.cdist(
                result_centers[:i], gt_centers[:i],
                metric='euclidean').diagonal()
            num_images_tracked = i - 1  # we don't count frame 0 and current frame (lost track)

            im.set_data(image)
            if gt is not None:
                if i < gt.shape[0]:
                    gt_rect.set_xy(gt[i, :2])
                    gt_rect.set_width(gt[i, 2])
                    gt_rect.set_height(gt[i, 3])
                else:
                    gt_rect.set_xy(np.array([np.nan, np.nan]))
                    gt_rect.set_width(np.nan)
                    gt_rect.set_height(np.nan)

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            plt.pause(.01)
            plt.draw()

            print(
                'Finished identify tracking failure and abort when in VOT mode'
            )
            return result[:
                          i], result_bb[:
                                        i], num_images_tracked, spf_total, result_distances, result_ious[:
                                                                                                         i], True
        ########################################

        # Data collect
        if success:
            pos_examples = pos_generator(target_bbox, opts['n_pos_update'],
                                         opts['overlap_pos_update'])
            pos_feats = forward_samples(model, image, pos_examples)
            pos_feats_all.append(pos_feats)
            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]

            neg_examples = neg_generator(target_bbox, opts['n_neg_update'],
                                         opts['overlap_neg_update'])
            neg_feats = forward_samples(model, image, neg_examples)
            neg_feats_all.append(neg_feats)
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.cat(pos_feats_all[-nframes:], 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.cat(pos_feats_all, 0)
            neg_data = torch.cat(neg_feats_all, 0)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        torch.cuda.empty_cache()
        spf = time.time() - tic
        spf_total += spf
        #print('Time: ', spf)

        # Display
        if display or savefig:
            im.set_data(image)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

                #################
                result_ious[i] = overlap_ratio(result_bb[i], gt[i])[0]
                result_centers[i] = result_bb[i, :2] + result_bb[i, 2:] / 2
                #################

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '{:04d}.jpg'.format(i)),
                            dpi=dpi)

        ####################################
        if detailed_printing:
            if gt is None:
                print("      Frame %d/%d, Score %.3f, Time %.3f" % \
                      (i, num_images-1, target_score, spf))
            else:
                if i < gt.shape[0]:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))
                else:
                    print("      Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                        (i, num_images-1, overlap_ratio(np.array([np.nan,np.nan,np.nan,np.nan]), result_bb[i])[0], target_score, spf))
        ####################################

        # if gt is None:
        #     print('Frame {:d}/{:d}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), target_score, spf))
        # else:
        #     overlap[i] = overlap_ratio(gt[i], result_bb[i])[0]
        #     print('Frame {:d}/{:d}, Overlap {:.3f}, Score {:.3f}, Time {:.3f}'
        #         .format(i, len(img_list), overlap[i], target_score, spf))

    ########################
    plt.close()
    result_distances = scipy.spatial.distance.cdist(
        result_centers, gt_centers, metric='euclidean').diagonal()
    num_images_tracked = num_images - 1  # I don't want to count initialization frame (i.e. frame 0)
    print('    main loop finished, %d frames' % (num_images))

    print('mean IoU: ', iou_list.mean())
    print('Finished run_mdnet()')

    return result, result_bb, num_images_tracked, spf_total, result_distances, result_ious, False
Exemple #8
0
def RTMDNet_init(model_path, image_file, init_bbox):
    state = dict()
    target_bbox = np.array(init_bbox)
    model = MDNet(model_path)
    if opts['adaptive_align']:
        align_h = model.roi_align_model.aligned_height
        align_w = model.roi_align_model.aligned_width
        spatial_s = model.roi_align_model.spatial_scale
        model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
    if opts['use_gpu']:
        model = model.cuda()

    model.set_learnable_params(opts['ft_layers'])
    # Init image crop model
    img_crop_model = imgCropper(1.)
    if opts['use_gpu']:
        img_crop_model.gpuEnable()

    # Init criterion and optimizer
    criterion = BinaryLoss()
    init_optimizer = set_optimizer(model, opts['lr_init'])
    update_optimizer = set_optimizer(model, opts['lr_update'])

    cur_image = Image.open(image_file).convert('RGB')
    cur_image = np.asarray(cur_image)

    # Draw pos/neg samples
    ishape = cur_image.shape
    #    logging.info('ishape: %s, n_pos_init: %s, overlap_pos_init: %s, target_bbox: %s', ishape, opts['n_pos_init'], opts['overlap_pos_init'], target_bbox)
    pos_examples = gen_samples(
        SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),
        target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
    neg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1),
        target_bbox, opts['n_neg_init'], opts['overlap_neg_init'])
    neg_examples = np.random.permutation(neg_examples)

    cur_bbreg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5,
                        1.1), target_bbox, opts['n_bbreg'],
        opts['overlap_bbreg'], opts['scale_bbreg'])

    # compute padded sample
    padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_scene_box = np.reshape(
        np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                    padded_y2 - padded_y1)), (1, 4))

    scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))

    if opts['jitter']:
        ## horizontal shift
        jittered_scene_box_horizon = np.copy(padded_scene_box)
        jittered_scene_box_horizon[0, 0] -= 4.
        jitter_scale_horizon = 1.

        ## vertical shift
        jittered_scene_box_vertical = np.copy(padded_scene_box)
        jittered_scene_box_vertical[0, 1] -= 4.
        jitter_scale_vertical = 1.

        jittered_scene_box_reduce1 = np.copy(padded_scene_box)
        jitter_scale_reduce1 = 1.1**(-1)

        ## vertical shift
        jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
        jitter_scale_enlarge1 = 1.1**(1)

        ## scale reduction
        jittered_scene_box_reduce2 = np.copy(padded_scene_box)
        jitter_scale_reduce2 = 1.1**(-2)
        ## scale enlarge
        jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
        jitter_scale_enlarge2 = 1.1**(2)

        scene_boxes = np.concatenate([
            scene_boxes, jittered_scene_box_horizon,
            jittered_scene_box_vertical, jittered_scene_box_reduce1,
            jittered_scene_box_enlarge1, jittered_scene_box_reduce2,
            jittered_scene_box_enlarge2
        ],
                                     axis=0)
        jitter_scale = [
            1., jitter_scale_horizon, jitter_scale_vertical,
            jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2,
            jitter_scale_enlarge2
        ]
    else:
        jitter_scale = [1.]

    model.eval()

    for bidx in range(0, scene_boxes.shape[0]):
        crop_img_size = (scene_boxes[bidx, 2:4] * (
            (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                         ).astype('int64') * jitter_scale[bidx]
        cropped_image, cur_image_var = img_crop_model.crop_image(
            cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size)
        cropped_image = cropped_image - 128.

        feat_map = model(cropped_image, out_layer='conv3')

        rel_target_bbox = np.copy(target_bbox)
        rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

        batch_num = np.zeros((pos_examples.shape[0], 1))
        cur_pos_rois = np.copy(pos_examples)
        #	logging.info('cur_pos_rois from copy: %s', cur_pos_rois.shape)

        cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_pos_rois.shape[0],
                                          axis=0)

        #       logging.info('cur_pos_rois after reshape: %s', cur_pos_rois.shape)

        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]

        #	logging.info('scaled_obj_size: %s ', scaled_obj_size)

        cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        #       logging.info('cur_pos_rois after after samples2maskroi: %s', cur_pos_rois.shape)
        cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
        #      logging.info('cur_pos_rois after after concatenate: %s', cur_pos_rois.shape)
        cur_pos_rois = Variable(
            torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
        cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
        cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                           -1).data.clone()

        batch_num = np.zeros((neg_examples.shape[0], 1))
        cur_neg_rois = np.copy(neg_examples)
        cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_neg_rois.shape[0],
                                          axis=0)
        cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
        cur_neg_rois = Variable(
            torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
        cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
        cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                           -1).data.clone()

        ## bbreg rois
        batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
        cur_bbreg_rois = np.copy(cur_bbreg_examples)
        cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                       (1, 2)),
                                            cur_bbreg_rois.shape[0],
                                            axis=0)
        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
        cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,
                                         (scaled_obj_size, scaled_obj_size),
                                         target_bbox[2:4], opts['padding'])
        cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1)
        cur_bbreg_rois = Variable(
            torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda()
        cur_bbreg_feats = model.roi_align_model(feat_map, cur_bbreg_rois)
        cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0),
                                               -1).data.clone()

        feat_dim = cur_pos_feats.size(-1)

        if bidx == 0:
            pos_feats = cur_pos_feats
            neg_feats = cur_neg_feats
            ##bbreg feature
            bbreg_feats = cur_bbreg_feats
            bbreg_examples = cur_bbreg_examples
        else:
            pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
            neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
            ##bbreg feature
            bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
            bbreg_examples = np.concatenate(
                (bbreg_examples, cur_bbreg_examples), axis=0)

    if pos_feats.size(0) > opts['n_pos_init']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :]
    if neg_feats.size(0) > opts['n_neg_init']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :]

    ##bbreg
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
        #print bbreg_examples.shape

    ## open images and crop patch from obj
    extra_obj_size = np.array((opts['img_size'], opts['img_size']))
    extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6)
    replicateNum = 100
    for iidx in range(replicateNum):
        extra_target_bbox = np.copy(target_bbox)

        extra_scene_box = np.copy(extra_target_bbox)
        extra_scene_box_center = extra_scene_box[
            0:2] + extra_scene_box[2:4] / 2.
        extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6)
        extra_scene_box[
            0:2] = extra_scene_box_center - extra_scene_box_size / 2.
        extra_scene_box[2:4] = extra_scene_box_size

        extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4)
        cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2)

        extra_scene_box[0] += extra_shift_offset[0]
        extra_scene_box[1] += extra_shift_offset[1]
        extra_scene_box[2:4] *= cur_extra_scale[0]

        scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0]

        cur_extra_cropped_image, _ = img_crop_model.crop_image(
            cur_image, np.reshape(extra_scene_box, (1, 4)),
            extra_crop_img_size)
        cur_extra_cropped_image = cur_extra_cropped_image.detach()

        cur_extra_pos_examples = gen_samples(
            SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1,
                            1.2), extra_target_bbox,
            opts['n_pos_init'] / replicateNum, opts['overlap_pos_init'])
        cur_extra_neg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2,
                            1.1), extra_target_bbox,
            opts['n_neg_init'] / replicateNum / 4, opts['overlap_neg_init'])

        ##bbreg sample
        cur_extra_bbreg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),
            extra_target_bbox, opts['n_bbreg'] / replicateNum / 4,
            opts['overlap_bbreg'], opts['scale_bbreg'])

        batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1))
        cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
        cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_pos_rois.shape[0],
                                                axis=0)
        cur_extra_pos_rois = samples2maskroi(
            cur_extra_pos_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois),
                                            axis=1)

        batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
        cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
        cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_neg_rois.shape[0],
                                                axis=0)
        cur_extra_neg_rois = samples2maskroi(
            cur_extra_neg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois),
                                            axis=1)

        ## bbreg rois
        batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
        cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
        cur_extra_bbreg_rois[:,
                             0:2] -= np.repeat(np.reshape(
                                 extra_scene_box[0:2], (1, 2)),
                                               cur_extra_bbreg_rois.shape[0],
                                               axis=0)
        cur_extra_bbreg_rois = samples2maskroi(
            cur_extra_bbreg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_bbreg_rois = np.concatenate(
            (batch_num, cur_extra_bbreg_rois), axis=1)

        if iidx == 0:
            extra_cropped_image = cur_extra_cropped_image

            extra_pos_rois = np.copy(cur_extra_pos_rois)
            extra_neg_rois = np.copy(cur_extra_neg_rois)
            ##bbreg rois
            extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
            extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
        else:
            extra_cropped_image = torch.cat(
                (extra_cropped_image, cur_extra_cropped_image), dim=0)

            extra_pos_rois = np.concatenate(
                (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
            extra_neg_rois = np.concatenate(
                (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
            ##bbreg rois
            extra_bbreg_rois = np.concatenate(
                (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0)
            extra_bbreg_examples = np.concatenate(
                (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)),
                axis=0)

    extra_pos_rois = Variable(
        torch.from_numpy(extra_pos_rois.astype('float32'))).cuda()
    extra_neg_rois = Variable(
        torch.from_numpy(extra_neg_rois.astype('float32'))).cuda()
    ##bbreg rois
    extra_bbreg_rois = Variable(
        torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda()

    extra_cropped_image -= 128.

    extra_feat_maps = model(extra_cropped_image, out_layer='conv3')
    # Draw pos/neg samples
    ishape = cur_image.shape

    extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois)
    extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0),
                                           -1).data.clone()

    extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois)
    extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0),
                                           -1).data.clone()
    ##bbreg feat
    extra_bbreg_feats = model.roi_align_model(extra_feat_maps,
                                              extra_bbreg_rois)
    extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0),
                                               -1).data.clone()

    ## concatenate extra features to original_features
    pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0)
    neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
    ## concatenate extra bbreg feats to original_bbreg_feats
    bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
    bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples),
                                    axis=0)

    torch.cuda.empty_cache()
    model.zero_grad()

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])

    ##bbreg train
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
    bbreg = BBRegressor((ishape[1], ishape[0]))
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)

    if pos_feats.size(0) > opts['n_pos_update']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats_all = [
            pos_feats.index_select(
                0,
                torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
        ]
    if neg_feats.size(0) > opts['n_neg_update']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats_all = [
            neg_feats.index_select(
                0,
                torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())
        ]

    state['trans_f'] = opts['trans_f']
    state['count'] = 1
    state['model'] = model
    state['target_bbox'] = target_bbox
    state['img_crop_model'] = img_crop_model
    state['bbreg'] = bbreg
    state['criterion'] = criterion
    state['update_optimizer'] = update_optimizer
    state['pos_feats_all'] = pos_feats_all
    state['neg_feats_all'] = neg_feats_all
    state['feat_dim'] = feat_dim

    return state
Exemple #9
0
def run_(model,
         criterion,
         target,
         init_optimizer,
         img_files,
         init_rect,
         video,
         gt=None):
    # Init bbox
    box_ = np.array(init_rect)
    result = np.zeros((len(img_files), 4))
    result_bb = np.zeros((len(img_files), 4))
    result[0] = box_
    bbreg_bbox = box_
    result_bb[0] = bbreg_bbox

    tic = time.time()
    # Load first image
    image = Image.open(img_files[0]).convert('RGB')  #[W,H] RGB
    image_np = np.asarray(image)  #[H, W, 3]

    # Init bbox regressor
    # Give the cropped region
    # bbreg_examples_roi is [x_min,y_min,x_max,y_max]
    region, crop_region_sz, bbreg_examples_roi, bbreg_im_index, box__roi, box__crop, coe = acquire_rois_bb(
        SampleGenerator('uniform', 0.3, 1.5, 1.1, True), image_np, opts, box_,
        opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg'])
    # bbreg_examples_reg is [x_min,y_min,w,h]
    bbreg_examples_reg = np.hstack(
        ((bbreg_examples_roi[:, 0]).reshape(-1, 1),
         (bbreg_examples_roi[:, 1]).reshape(-1, 1),
         (bbreg_examples_roi[:, 2:] - bbreg_examples_roi[:, :2])))
    bbreg_feats = extract_feat(model,
                               region,
                               bbreg_examples_roi,
                               bbreg_im_index,
                               fea_view=True)
    bbreg = BBRegressor((np.array(region.shape[2:])).reshape(-1, 2),
                        overlap=opts['overlap_bbreg'],
                        scale=opts['scale_bbreg'])
    bbreg.train(bbreg_feats, bbreg_examples_reg, box__roi)

    # Draw pos/neg samples
    pos_examples, pos_examples_roi, pos_im_index = acquire_roi_samples(
        SampleGenerator('gaussian', 0.1, 1.2, valid=True), coe, box__crop,
        crop_region_sz, opts['n_pos_init'], opts['overlap_pos_init'])

    neg_examples, neg_examples_roi, neg_im_index = acquire_roi_samples(
        SampleGenerator('uniform', 1, 2, 1.1, valid=True), coe, box__crop,
        crop_region_sz, opts['n_neg_init'] // 2, opts['overlap_neg_init'])

    neg_examples_whole, neg_examples_roi_whole, neg_im_index_whole = acquire_roi_samples(
        SampleGenerator('whole', 0, 1.2, 1.1, valid=True), coe, box__crop,
        crop_region_sz, opts['n_neg_init'] // 2, opts['overlap_neg_init'])

    neg_examples_roi = np.concatenate(
        (neg_examples_roi, neg_examples_roi_whole), axis=0)
    neg_examples_roi = np.random.permutation(neg_examples_roi)
    neg_im_index = np.concatenate((neg_im_index, neg_im_index_whole), axis=0)

    # Extract pos/neg features
    pos_feats = extract_feat(model, region, pos_examples_roi, pos_im_index)
    neg_feats = extract_feat(model, region, neg_examples_roi, neg_im_index)
    feat_dim = pos_feats.size(-1)
    channel_dim = pos_feats.size(-3)

    # Initial training
    train(model, criterion, target, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])
    model.stop_learnable_params(opts['stop_layers'])
    update_optimizer = set_optimizer(model, opts['lr_update'])

    # Init sample generators
    sample_generator = SampleGenerator('gaussian',
                                       opts['trans_f'],
                                       opts['scale_f'],
                                       valid=True)
    pos_generator = SampleGenerator('gaussian', 0.1, 1.2, valid=True)
    neg_generator = SampleGenerator('uniform', 1.5, 1.2, valid=True)

    # Init pos/neg features for update
    pos_feats_all = [pos_feats[:opts['n_pos_update']]]
    neg_feats_all = [neg_feats[:opts['n_neg_update']]]

    spf_total = time.time() - tic

    # Start tracking
    unsuccess_num = 0
    for i in range(1, len(img_files)):

        tic = time.time()
        # Load image
        image = Image.open(img_files[i]).convert('RGB')
        image_np = np.asarray(image)  #[H, W, 3]

        # Cropping
        region, crop_region_sz, coe, box__crop = acquire_region(
            image_np, box_, opts)
        samples, samples_roi, samples_im_index = acquire_roi_samples(
            sample_generator, coe, box__crop, crop_region_sz,
            opts['n_samples'])
        sample_scores, sta_g_weight, sta_penalty, atten_map, conv3_fea = extract_feat(
            model, region, samples_roi, samples_im_index, out_layer='capsule')
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu().numpy()
        target_score = top_scores.mean()
        samples_topk = samples[top_idx]
        samples_topk[:, :2] = samples_topk[:, :2] - box__crop[:2].reshape(
            -1, 2) + box_[:2].reshape(-1, 2)
        box__copy = box_.copy()
        box_ = samples_topk.mean(
            axis=0)  #Take the mean value of top 5 as the tracking result
        success = target_score > opts['success_thr']

        # Expand search area when failure occurs
        if success:
            unsuccess_num = 0
            sample_generator.set_trans_f(opts['trans_f'])
        else:
            unsuccess_num += 1
            sample_generator.set_trans_f(opts['trans_f_expand'])

        # Bbox regression
        if success:
            bbreg_samples_roi = samples_roi[top_idx]
            bbreg_samples_reg = np.hstack(
                ((bbreg_samples_roi[:, 0]).reshape(-1, 1),
                 (bbreg_samples_roi[:, 1]).reshape(-1, 1),
                 (bbreg_samples_roi[:, 2:] - bbreg_samples_roi[:, :2])))
            bbreg_feats = extract_feat(model,
                                       region,
                                       bbreg_samples_roi,
                                       samples_im_index[top_idx],
                                       fea_view=True)
            bbreg_samples = bbreg.predict(bbreg_feats, bbreg_samples_reg)
            bbreg_bbox = bbreg_samples.mean(axis=0)

            bbreg_bbox = np.array([
                bbreg_bbox[0] * coe[0], bbreg_bbox[1] * coe[1],
                bbreg_bbox[2] * coe[0], bbreg_bbox[3] * coe[1]
            ])
            bbreg_bbox[:2] = np.array(bbreg_bbox[:2] - box__crop[:2] +
                                      box__copy[:2])
        else:
            bbreg_bbox = box_

        # Copy previous result at failure
        if not success:
            box_ = result[i - 1]
            bbreg_bbox = result_bb[i - 1]

        # Save result
        result[i] = box_
        result_bb[i] = bbreg_bbox

        # Data collect
        if success:
            # Draw pos/neg samples
            region, crop_region_sz, coe, box__crop = acquire_region(
                image_np, box_, opts)
            pos_examples, pos_examples_roi, pos_im_index = acquire_roi_samples(
                pos_generator, coe, box__crop, crop_region_sz,
                opts['n_pos_update'], opts['overlap_pos_update'])
            neg_examples, neg_examples_roi, neg_im_index = acquire_roi_samples(
                neg_generator, coe, box__crop, crop_region_sz,
                opts['n_neg_update'], opts['overlap_neg_update'])
            # Extract pos/neg features
            pos_feats = extract_feat(model, region, pos_examples_roi,
                                     pos_im_index)
            neg_feats = extract_feat(model, region, neg_examples_roi,
                                     neg_im_index)
            pos_feats_all.append(pos_feats)
            neg_feats_all.append(neg_feats)
            if len(pos_feats_all
                   ) > opts['n_frames_long']:  # Accumulate updating features
                del pos_feats_all[
                    1]  # Keep the information of the first frame 1 or 0
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[
                    0]  # Keep the information of the first frame, but it will hurt ironman

        # Short term update
        if (not success) & (unsuccess_num < 15):
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, channel_dim, feat_dim,
                                           feat_dim)  # [20*50, 512,7,7]
            neg_data = torch.stack(neg_feats_all, 0).view(
                -1, channel_dim, feat_dim,
                feat_dim)  # [20 or less *200, 512,7,7]
            train(model, criterion, target, update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.stack(pos_feats_all,
                                   0).view(-1, channel_dim, feat_dim, feat_dim)
            neg_data = torch.stack(neg_feats_all,
                                   0).view(-1, channel_dim, feat_dim, feat_dim)
            train(model, criterion, target, update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])

        spf = time.time() - tic
        spf_total += spf

    fps = len(img_files) / spf_total
    print("Speed: %.3f" % (fps))
    return result, result_bb, fps, spf_total
Exemple #10
0
# train bb regression
print('==> bb regression')

dataset = SimpleSampler(train_bbox,
                        img_transforms,
                        num=[FINETUNE_POS_NUM, 0],
                        threshold=FINETUNE_IOU_THRESHOLD)(train_img,
                                                          train_bbox)
loader = data.DataLoader(dataset, len(dataset))

imgs, boxes = next(iter(loader))

feat = Variable(imgs).cuda()
feat = featNet(feat)

bbreg = BBRegressor()
bbreg.fit(feat, boxes, train_bbox)

print('done')
"""
predicting
"""
print('==> predicting')

recent_scores = []
recent_regions = []

last_bbox = train_bbox

for frame_idx, (frame_img, _) in enumerate(folder, 1):
    print(f'Frame[{frame_idx}]', end='\t')
    def __init__(self, init_bbox, first_frame):
        self.frame_idx = 0

        self.target_bbox = np.array(init_bbox)
        self.bbreg_bbox = self.target_bbox

        # Init model
        self.model = MDNet(opts['model_path'])
        if opts['use_gpu']:
            self.model = self.model.cuda()
        self.model.set_learnable_params(opts['ft_layers'])

        # Init criterion and optimizer
        self.criterion = BinaryLoss()
        self.init_optimizer = set_optimizer(self.model, opts['lr_init'])
        self.update_optimizer = set_optimizer(self.model, opts['lr_update'])

        # Train bbox regressor
        bbreg_examples = gen_samples(
            SampleGenerator('uniform', first_frame.size, 0.3, 1.5,
                            1.1), self.target_bbox, opts['n_bbreg'],
            opts['overlap_bbreg'], opts['scale_bbreg'])
        assert len(bbreg_examples) > 0
        bbreg_feats = forward_samples(self.model, first_frame, bbreg_examples)
        assert len(bbreg_feats) > 0
        self.bbreg = BBRegressor(first_frame.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        # Draw pos/neg samples
        pos_examples = gen_samples(
            SampleGenerator('gaussian', first_frame.size, 0.1, 1.2),
            self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            gen_samples(
                SampleGenerator('uniform', first_frame.size, 1, 2,
                                1.1), self.target_bbox,
                opts['n_neg_init'] // 2, opts['overlap_neg_init']),
            gen_samples(
                SampleGenerator('whole', first_frame.size, 0, 1.2,
                                1.1), self.target_bbox,
                opts['n_neg_init'] // 2, opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        pos_feats = forward_samples(self.model, first_frame, pos_examples)
        neg_feats = forward_samples(self.model, first_frame, neg_examples)
        self.feat_dim = pos_feats.size(-1)

        # Initial training
        train(self.model, self.criterion, self.init_optimizer, pos_feats,
              neg_feats, opts['maxiter_init'])

        # Init sample generators
        self.sample_generator = SampleGenerator('gaussian',
                                                first_frame.size,
                                                opts['trans_f'],
                                                opts['scale_f'],
                                                valid=True)
        self.pos_generator = SampleGenerator('gaussian', first_frame.size, 0.1,
                                             1.2)
        self.neg_generator = SampleGenerator('uniform', first_frame.size, 1.5,
                                             1.2)

        # Init pos/neg features for update
        self.pos_feats_all = [pos_feats[:opts['n_pos_update']]]
        self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]
class Tracker:
    def __init__(self, init_bbox, first_frame):
        self.frame_idx = 0

        self.target_bbox = np.array(init_bbox)
        self.bbreg_bbox = self.target_bbox

        # Init model
        self.model = MDNet(opts['model_path'])
        if opts['use_gpu']:
            self.model = self.model.cuda()
        self.model.set_learnable_params(opts['ft_layers'])

        # Init criterion and optimizer
        self.criterion = BinaryLoss()
        self.init_optimizer = set_optimizer(self.model, opts['lr_init'])
        self.update_optimizer = set_optimizer(self.model, opts['lr_update'])

        # Train bbox regressor
        bbreg_examples = gen_samples(
            SampleGenerator('uniform', first_frame.size, 0.3, 1.5,
                            1.1), self.target_bbox, opts['n_bbreg'],
            opts['overlap_bbreg'], opts['scale_bbreg'])
        assert len(bbreg_examples) > 0
        bbreg_feats = forward_samples(self.model, first_frame, bbreg_examples)
        assert len(bbreg_feats) > 0
        self.bbreg = BBRegressor(first_frame.size)
        self.bbreg.train(bbreg_feats, bbreg_examples, self.target_bbox)

        # Draw pos/neg samples
        pos_examples = gen_samples(
            SampleGenerator('gaussian', first_frame.size, 0.1, 1.2),
            self.target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])

        neg_examples = np.concatenate([
            gen_samples(
                SampleGenerator('uniform', first_frame.size, 1, 2,
                                1.1), self.target_bbox,
                opts['n_neg_init'] // 2, opts['overlap_neg_init']),
            gen_samples(
                SampleGenerator('whole', first_frame.size, 0, 1.2,
                                1.1), self.target_bbox,
                opts['n_neg_init'] // 2, opts['overlap_neg_init'])
        ])
        neg_examples = np.random.permutation(neg_examples)

        # Extract pos/neg features
        pos_feats = forward_samples(self.model, first_frame, pos_examples)
        neg_feats = forward_samples(self.model, first_frame, neg_examples)
        self.feat_dim = pos_feats.size(-1)

        # Initial training
        train(self.model, self.criterion, self.init_optimizer, pos_feats,
              neg_feats, opts['maxiter_init'])

        # Init sample generators
        self.sample_generator = SampleGenerator('gaussian',
                                                first_frame.size,
                                                opts['trans_f'],
                                                opts['scale_f'],
                                                valid=True)
        self.pos_generator = SampleGenerator('gaussian', first_frame.size, 0.1,
                                             1.2)
        self.neg_generator = SampleGenerator('uniform', first_frame.size, 1.5,
                                             1.2)

        # Init pos/neg features for update
        self.pos_feats_all = [pos_feats[:opts['n_pos_update']]]
        self.neg_feats_all = [neg_feats[:opts['n_neg_update']]]

    def track(self, image):
        self.frame_idx += 1

        # Estimate target bbox
        samples = gen_samples(self.sample_generator, self.target_bbox,
                              opts['n_samples'])
        sample_scores = forward_samples(self.model,
                                        image,
                                        samples,
                                        out_layer='fc6')
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.cpu().numpy()
        target_score = top_scores.mean()

        success = target_score > opts['success_thr']

        # Expand search area at failure
        if success:
            self.sample_generator.set_trans_f(opts['trans_f'])
        else:
            self.sample_generator.set_trans_f(opts['trans_f_expand'])

        # Save result at success.
        if success:
            self.target_bbox = samples[top_idx].mean(axis=0)

            # Bbox regression
            bbreg_samples = samples[top_idx]
            bbreg_feats = forward_samples(self.model, image, bbreg_samples)
            bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples)
            self.bbreg_bbox = bbreg_samples.mean(axis=0)

        # Data collect
        if success:
            # Draw pos/neg samples
            pos_examples = gen_samples(self.pos_generator, self.target_bbox,
                                       opts['n_pos_update'],
                                       opts['overlap_pos_update'])
            neg_examples = gen_samples(self.neg_generator, self.target_bbox,
                                       opts['n_neg_update'],
                                       opts['overlap_neg_update'])

            # Extract pos/neg features
            pos_feats = forward_samples(self.model, image, pos_examples)
            neg_feats = forward_samples(self.model, image, neg_examples)
            self.pos_feats_all.append(pos_feats)
            self.neg_feats_all.append(neg_feats)
            if len(self.pos_feats_all) > opts['n_frames_long']:
                del self.pos_feats_all[0]
            if len(self.neg_feats_all) > opts['n_frames_short']:
                del self.neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(self.pos_feats_all))
            pos_data = torch.stack(self.pos_feats_all[-nframes:],
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            train(self.model, self.criterion, self.update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])

        # Long term update
        elif self.frame_idx % opts['long_interval'] == 0:
            pos_data = torch.stack(self.pos_feats_all,
                                   0).view(-1, self.feat_dim)
            neg_data = torch.stack(self.neg_feats_all,
                                   0).view(-1, self.feat_dim)
            train(self.model, self.criterion, self.update_optimizer, pos_data,
                  neg_data, opts['maxiter_update'])

        return self.bbreg_bbox, target_score