Ejemplo n.º 1
0
    def visualize_iou_pred(self, iou_features, center_box):
        center_box = center_box.view(1,1,4)
        sz_norm = center_box[...,2:].clone()
        center_box_rel = bbutils.rect_to_rel(center_box, sz_norm)

        pos_dist = 1.0
        sz_dist = math.log(3.0)
        pos_step = 0.01
        sz_step = 0.01

        pos_scale = torch.arange(-pos_dist, pos_dist+pos_step, step=pos_step)
        sz_scale = torch.arange(-sz_dist, sz_dist+sz_step, step=sz_step)

        bbx = torch.zeros(1, pos_scale.numel(), 4)
        bbx[0,:,0] = pos_scale.clone()
        bby = torch.zeros(pos_scale.numel(), 1, 4)
        bby[:,0,1] = pos_scale.clone()
        bbw = torch.zeros(1, sz_scale.numel(), 4)
        bbw[0,:,2] = sz_scale.clone()
        bbh = torch.zeros(sz_scale.numel(), 1, 4)
        bbh[:,0,3] = sz_scale.clone()

        pos_boxes = bbutils.rel_to_rect((center_box_rel + bbx) + bby, sz_norm).view(1,-1,4).to(self.params.device)
        sz_boxes = bbutils.rel_to_rect((center_box_rel + bbw) + bbh, sz_norm).view(1,-1,4).to(self.params.device)

        pos_scores = self.net.bb_regressor.predict_iou(self.iou_modulation, iou_features, pos_boxes).exp()
        sz_scores = self.net.bb_regressor.predict_iou(self.iou_modulation, iou_features, sz_boxes).exp()

        show_tensor(pos_scores.view(pos_scale.numel(),-1), title='Position scores', fig_num=21)
        show_tensor(sz_scores.view(sz_scale.numel(),-1), title='Size scores', fig_num=22)
Ejemplo n.º 2
0
    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, self.pos, sample_scales,
                                              self.img_sample_sz)

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)

        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec,
                          self.target_scale * scale_change_factor)
        self.predict_target_box(sample_pos, sample_scales[scale_ind],
                                scale_ind)

        if self.params.debug >= 2:
            show_tensor(s[scale_ind, ...], 5)
        if self.params.debug >= 3:
            for i, hf in enumerate(self.filter):
                show_tensor(fourier.sample_fs(hf).abs().mean(1), 6 + i)

        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList(
            [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2 * math.pi * (self.pos - sample_pos) / (
            sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Update memory
        self.update_memory(train_xf)

        # Train filter
        if self.frame_num % self.params.train_skipping == 1:
            self.filter_optimizer.run(self.params.CG_iter, train_xf)
            self.symmetrize_filter()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        return new_state.tolist()
Ejemplo n.º 3
0
    def track(self, image) -> dict:
        # print('track',torch.rand(2))
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,
                                               self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params,
                                            'update_scale_when_uncertain',
                                            True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec,
                                  sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # metricnet
        if self.use_iou_net and flag != 'not_found':
            pos_tmp = self.pos_iounet.clone()
        else:
            pos_tmp = self.pos
        state_tmp = torch.cat(
            (pos_tmp[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))
        state_tmp = state_tmp.numpy()
        with torch.no_grad():
            current_target_metric_feature0 = get_target_feature(
                self.metric_model, state_tmp, np.array(image))
            current_target_metric_feature = current_target_metric_feature0.cpu(
            ).detach().numpy()
        # success, target_dist = judge_success(self.metric_model, current_target_metric_feature,
        #                                      self.target_metric_feature, self.params)
        lof_score, success = lof(current_target_metric_feature,
                                 self.clf,
                                 k=5,
                                 thresh=self.lof_thresh)

        if self.frame_num <= self.params.train_skipping:
            self.lof_thresh = (self.lof_thresh *
                               (self.frame_num - 2) + lof_score *
                               self.params.lof_rate) / (self.frame_num - 1)
            if self.frame_num == self.params.train_skipping:
                print(self.frame_num, lof_score, self.lof_thresh)
        # print(self.frame_num, ':    lof:', lof_score, '  ', success)
        # if success:
        #     for ii in range(len(self.target_features_all)-1,-1,-1):
        #         dist = torch.norm(self.target_features_all[ii] - current_target_metric_feature0 , 2, dim=1).view(-1)
        #         if dist<self.similar:
        #             success=0
        #             continue
        # if success:
        #     self.target_features_all.append(current_target_metric_feature0)
        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        if self.frame_num > self.params.train_skipping:
            update_flag = update_flag and success
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,
                                              sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.filter_optimizer.run(self.params.hard_negative_CG_iter)
        elif (self.frame_num - 1) % self.params.train_skipping == 0:
            self.filter_optimizer.run(self.params.CG_iter)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        out = {'target_bbox': new_state.tolist()}
        return out
Ejemplo n.º 4
0
Archivo: dimp.py Proyecto: xiaozai/DAL
    def track(self, image) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # print(['pos',self.pos])
        # print(['get_centered_sample_pos',self.get_centered_sample_pos()])
        # Extract backbone features
        backbone_feat, sample_coords = self.extract_backbone_features(im, self.get_centered_sample_pos(),
                                                                      self.target_scale * self.params.scale_factors,#1.82
                                                                      self.img_sample_sz)#18*16
        # Extract classification features
        test_x = self.get_classification_features(backbone_feat)

        # Location of sample
        sample_pos, sample_scales = self.get_sample_location(sample_coords)
        #print(sample_scales)

        # Compute classification scores
        scores_raw = self.classify_target(test_x)

        # Localize the target
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw, sample_scales)
        #print(['translation_vec', translation_vec])
        new_pos = sample_pos[scale_ind,:] + translation_vec

        self.debug_info['flag'] = flag

        # print(['flag'],flag)

        # Update position and scale
        if flag != 'not_found':
            if getattr(self.params, 'use_iou_net', True):
                update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind,:], sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = getattr(self.params, 'hard_negative_learning_rate', None) if hard_negative else None

        if getattr(self.params, 'update_classifier', False) and update_flag:
            # Get train sample
            train_x = test_x[scale_ind:scale_ind+1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind,:], sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate, s[scale_ind,...])

        # Set the pos of the tracker to iounet pos
        if getattr(self.params, 'use_iou_net', True) and flag != 'not_found' and hasattr(self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            #print(['score_map has shape'], score_map.shape)
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # Compute output bounding box
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        if flag == 'not_found': #e.g. occluted, out of view
            out = {'target_bbox': [0, 0, 0, 0]}
        else:
            out = {'target_bbox': new_state.tolist()}
        return out
Ejemplo n.º 5
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,
                                               self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(sample_pos + translation_vec,
                                  sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,
                                              sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.filter_optimizer.run(self.params.hard_negative_CG_iter)
        elif (self.frame_num - 1) % self.params.train_skipping == 0:
            self.filter_optimizer.run(self.params.CG_iter)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        out = {'target_bbox': new_state.tolist()}
        return out
Ejemplo n.º 6
0
    def track(self, image, info: dict = None) -> dict:

        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # track on each layer
        # 0-2 m 2-4m, 4-6m , 6-8m, 8-10m, 10-inf
        max_score = 0
        flag = 'not_found'
        new_pos = [-1, -1, -1, -1]
        scale_ind = None
        backbone_feat = None
        test_x = None
        sample_pos = None
        s = None
        sample_coords = None

        target_dist = 0
        final_layer = image
        print(np.max(image))

        start = max(0, self.layer_id - 1)
        end = min(11, self.layer_id + 2)
        for z_dist in range(start, end):
            if z_dist == 10:
                lower = 10000  # 10 meter
                upper = np.max(image)
            else:
                lower = z_dist * 1000
                upper = (z_dist + 2) * 1000

            layer = image.copy()
            layer[layer > upper] = 0
            layer[layer < lower] = 0

            print(lower, upper, np.median(np.nonzero(layer)))

            layer = (layer - lower) / (upper - lower)
            layer = np.asarray(layer * 255, dtype=np.uint8)
            layer = cv2.applyColorMap(layer, cv2.COLORMAP_JET)
            layer = numpy_to_torch(layer)

            # Extract backbone features
            backbone_feat_layer, sample_coords_layer, im_patches_layer = self.extract_backbone_features(
                layer, self.get_centered_sample_pos(),
                self.target_scale * self.params.scale_factors,
                self.img_sample_sz)
            # Extract classification features
            test_x_layer = self.get_classification_features(
                backbone_feat_layer)

            # Location of sample
            sample_pos_layer, sample_scales_layer = self.get_sample_location(
                sample_coords_layer)

            # Compute classification scores
            scores_raw_layer = self.classify_target(test_x_layer)

            # Localize the target
            translation_vec_layer, scale_ind_layer, s_layer, flag_layer = self.localize_target(
                scores_raw_layer, sample_pos_layer,
                sample_scales_layer)  # Song Here can add depth cues
            new_pos_layer = sample_pos_layer[
                scale_ind_layer, :] + translation_vec_layer

            score_map_layer = s_layer[scale_ind_layer, ...]
            max_score_layer = torch.max(score_map_layer).item()

            if flag_layer != 'not_found' and max_score_layer > max_score:
                flag = flag_layer
                max_score = max_score_layer
                new_pos = new_pos_layer
                scale_ind = scale_ind_layer
                sample_pos = sample_pos_layer
                backbone_feat = backbone_feat_layer
                test_x = test_x_layer
                sample_scales = sample_scales_layer
                s = s_layer
                target_dist = z_dist
                sample_coords = sample_coords_layer

                final_layer = layer

        # if max_score > 0.8:
        # self.layer_id = target_dist

        print('Choose %d meter ... ' % target_dist, flag, max_score)

        # Update position and scale
        if flag != 'not_found':
            if self.params.get('use_iou_net', True):
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind, :],
                                       sample_scales[scale_ind], scale_ind,
                                       update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.get('hard_negative_learning_rate',
                                        None) if hard_negative else None

        if update_flag and self.params.get('update_classifier', False):
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz,
                                             sample_pos[scale_ind, :],
                                             sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate,
                                   s[scale_ind, ...])

        # Set the pos of the tracker to iounet pos
        if self.params.get('use_iou_net',
                           True) and flag != 'not_found' and hasattr(
                               self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        if flag != 'not_found':
            score_map = s[scale_ind, ...]
            max_score = torch.max(score_map).item()

            # Visualize and set debug info
            self.search_area_box = torch.cat(
                (sample_coords[scale_ind,
                               [1, 0]], sample_coords[scale_ind, [3, 2]] -
                 sample_coords[scale_ind, [1, 0]] - 1))
            self.debug_info['flag' + self.id_str] = flag
            self.debug_info['max_score' + self.id_str] = max_score
            if self.visdom is not None:
                self.visdom.register(score_map, 'heatmap', 2,
                                     'Score Map' + self.id_str)
                self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
            elif self.params.debug >= 2:
                show_tensor(score_map,
                            5,
                            title='Max score = {:.2f}'.format(max_score))

        else:
            max_score = 0
            final_layer = image

        # Compute output bounding box
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        if self.params.get('output_not_found_box',
                           False) and flag == 'not_found':
            output_state = [-1, -1, -1, -1]
        else:
            output_state = new_state.tolist()

            target_depth = get_target_depth(image, output_state)
            self.layer_id = int(target_depth // 2000)

        out = {
            'target_bbox': output_state,
            'confidence': max_score,
            'image': torch_to_numpy(final_layer)
        }
        return out
Ejemplo n.º 7
0
    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)
        self.im = im    # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Save response(added window) for speed adjust search region
        self.response = s[scale_ind] 

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec, sample_scales[scale_ind])

        if self.params.debug >= 2:
            show_tensor(s[scale_ind,...], 5, title='Max score = {:.2f}'.format(torch.max(s[scale_ind,...]).item()))


        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos, sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.filter_optimizer.run(self.params.hard_negative_CG_iter)
        elif (self.frame_num-1) % self.params.train_skipping == 0:
            self.filter_optimizer.run(self.params.CG_iter)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        return new_state.tolist()
Ejemplo n.º 8
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only
        # '''
        # Song : add the depth
        # '''
        # if 'depth' in info.keys():
        #     depth = info['depth']
        #     depth = torch.from_numpy(np.asarray(depth, dtype=np.float32)).float()
        # else:
        #     depth = None
        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors

        # if not depth:
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,
                                               self.img_sample_sz)
        # else:
        # '''
        # Song : edited extract_processed_sample
        # '''
        # if 'depth_usage' in info.keys():
        #     depth_usage = info['depth_usage']
        # else:
        #     depth_usage = 'default'
        #
        # if depth_usage == 'hist_depth_mask':
        #     test_x = self.extract_processed_sample_hist_depth_mask(im, depth, self.pos, sample_scales, self.img_sample_sz)
        # elif depth_usage == 'kmeans_depth_mask':
        #     # Not implemented yet
        #     test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz)
        # elif depth_usage == 'default':
        #     test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz)
        # else:
        #     '''
        #     nothing to do , just as the color inputs
        #     '''
        #     test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(sample_pos + translation_vec,
                                  sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))
        #
        # '''
        #     Song : should I do something when update_state ???
        # '''
        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,
                                              sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            self.filter_optimizer.run(self.params.hard_negative_CG_iter)
        elif (self.frame_num - 1) % self.params.train_skipping == 0:
            self.filter_optimizer.run(self.params.CG_iter)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        # out = {'target_bbox': new_state.tolist()}
        out = {
            'target_bbox': new_state.tolist(),
            'confidence': max_score
        }  # , 'score_map': s.clone().cpu().numpy().squeeze()} # Song !!!!, as the confidence
        return out
Ejemplo n.º 9
0
    def track(self, image) -> dict:

        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, self.pos, sample_scales,
                                              self.img_sample_sz)

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)
        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec,
                          self.target_scale * scale_change_factor)

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # if self.params.debug >= 3:
        #     for i, hf in enumerate(self.filter):
        #         show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i)

        # metric
        state_tmp = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))
        state_tmp = state_tmp.numpy()
        with torch.no_grad():
            self.current_target_metric_feature.append(
                get_target_feature(self.metric_model, state_tmp,
                                   np.array(image)).cpu().detach().numpy())
        # self.iou.append(overlap_ratio(state_tmp,self.ground_truth_rect[self.frame_num-1]))
        # success, target_dist = judge_success_no_class(self.metric_model, current_target_metric_feature,self.target_metric_feature, self.params)
        # lof_predict,success = lof(self.gt_pos_features, current_target_metric_feature.cpu().detach().numpy().reshape((1,1024)), k=5,thresh=5)
        # print(self.frame_num,':    lof:',lof_predict[0],'  ',success[0])
        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList(
            [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2 * math.pi * (self.pos - sample_pos) / (
            sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        self.train_xf.append(train_xf)

        if self.frame_num == 1:
            # Update memory
            self.update_memory(train_xf)  # metricnet
            self.filter_optimizer.run(self.params.CG_iter, train_xf)
            self.symmetrize_filter()
        elif self.frame_num % self.params.train_skipping == 1:
            current_target_metric_feature = np.array(
                self.current_target_metric_feature).squeeze()
            current_target_metric_feature0 = torch.from_numpy(
                current_target_metric_feature).cuda()
            # lof_predict, success = lof(np.concatenate([self.gt_pos_features,current_target_metric_feature],axis=0), k=20,thresh=self.lof_thresh)
            lof_predict, success = lof(current_target_metric_feature,
                                       self.clf,
                                       k=5,
                                       thresh=self.lof_thresh)
            last_id = -1
            if self.frame_num <= self.params.train_skipping + 1:
                self.lof_thresh = lof_predict.mean() * self.params.lof_rate
                print('lof_thresh:', self.lof_thresh)
            for ii in range(len(self.train_xf)):
                # print('lof:',lof_predict[ii],'   iou:',self.iou[ii],success[ii])
                if self.frame_num > self.params.train_skipping + 1 and success[
                        ii]:
                    for kk in range(len(self.target_features_all) - 1, -1, -1):
                        dist = torch.norm(
                            self.target_features_all[kk] -
                            current_target_metric_feature0[ii].reshape(
                                [1, 1024]),
                            2,
                            dim=1).view(-1)
                        if dist < self.similar:
                            success[ii] = 0
                            continue
                if self.frame_num <= self.params.train_skipping + 1 or success[
                        ii]:
                    self.target_features_all.append(
                        current_target_metric_feature0[ii].reshape([1, 1024]))
                    last_id = ii
                    self.update_memory(self.train_xf[ii])
            if last_id > -1:
                self.filter_optimizer.run(self.params.CG_iter,
                                          self.train_xf[last_id])
                self.symmetrize_filter()
            self.current_target_metric_feature = []
            self.train_xf = []
            # self.iou=[]
        # # Train filter
        # if self.frame_num % self.params.train_skipping == 1:
        #     self.filter_optimizer.run(self.params.CG_iter, train_xf)
        #     self.symmetrize_filter()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        out = {'target_bbox': new_state.tolist()}
        return out
Ejemplo n.º 10
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Extract backbone features
        backbone_feat, sample_coords, im_patches = self.extract_backbone_features(
            im, self.get_centered_sample_pos(),
            self.target_scale * self.params.scale_factors, self.img_sample_sz)
        # Extract classification features
        test_x = self.get_classification_features(backbone_feat)

        # Location of sample
        sample_pos, sample_scales = self.get_sample_location(sample_coords)

        # Compute classification scores
        scores_raw = self.classify_target(test_x)

        # Localize the target
        translation_vec, scale_ind, s, flag = self.localize_target(
            scores_raw, sample_pos, sample_scales)
        new_pos = sample_pos[scale_ind, :] + translation_vec

        # Update position and scale
        if flag != 'not_found':
            if self.params.get('use_iou_net', True):
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind, :],
                                       sample_scales[scale_ind], scale_ind,
                                       update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.get('hard_negative_learning_rate',
                                        None) if hard_negative else None

        if update_flag and self.params.get('update_classifier', False):
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz,
                                             sample_pos[scale_ind, :],
                                             sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate,
                                   s[scale_ind, ...])

        # Set the pos of the tracker to iounet pos
        if self.params.get('use_iou_net',
                           True) and flag != 'not_found' and hasattr(
                               self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()

        # Visualize and set debug info
        self.search_area_box = torch.cat(
            (sample_coords[scale_ind,
                           [1, 0]], sample_coords[scale_ind, [3, 2]] -
             sample_coords[scale_ind, [1, 0]] - 1))
        self.debug_info['flag' + self.id_str] = flag
        self.debug_info['max_score' + self.id_str] = max_score
        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2,
                                 'Score Map' + self.id_str)
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # Compute output bounding box
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        if self.params.get('output_not_found_box',
                           False) and flag == 'not_found':
            output_state = [-1, -1, -1, -1]
        else:
            output_state = new_state.tolist()

        out = {'target_bbox': output_state}
        return out
Ejemplo n.º 11
0
    def track(self, image1, image2):

        self.frame_num += 1

        # Convert image
        im1 = numpy_to_torch(image1)
        im2 = numpy_to_torch(image2)
#        self.im = im

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x1 = self.extract_sample(im1, self.pos, sample_scales, self.img_sample_sz)
        test_x2 = self.extract_sample(im2, self.pos, sample_scales, self.img_sample_sz)
        test_x = TensorList([torch.cat((v,i),1) for v, i in zip(test_x1, test_x2)])

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec, sample_scales[scale_ind])

        if self.params.debug >= 2:
            show_tensor(s[scale_ind,...], 5, title='Max score = {:.2f}'.format(torch.max(s[scale_ind,...]).item()))


        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = getattr(self.params, 'hard_negative_learning_rate', None) if hard_negative else None

        if getattr(self.params, 'update_classifier', False) and update_flag:
            # Get train sample
            train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x])

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos, sample_scales[scale_ind])
            train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]).to(self.params.device)

            # Update the classifier model
            self.update_classifier(train_x, train_y, target_box, learning_rate, s[scale_ind,...])

            # Update memory
            # self.update_memory(train_x, train_y, learning_rate)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found' and hasattr(self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        return new_state.tolist()
Ejemplo n.º 12
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)
        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor)

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # if self.params.debug >= 3:
        #     for i, hf in enumerate(self.filter):
        #         show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i)


        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList([xf[scale_ind:scale_ind+1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2*math.pi * (self.pos - sample_pos) / (sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Update memory
        self.update_memory(train_xf)

        # Train filter
        if self.frame_num % self.params.train_skipping == 1:
            self.filter_optimizer.run(self.params.CG_iter, train_xf)
            self.symmetrize_filter()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        out = {'target_bbox': new_state.tolist()}
        return out
Ejemplo n.º 13
0
    def track(self, image):
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Extract backbone features
        backbone_feat, sample_coords, im_patches = self.extract_backbone_features(
            im, self.get_centered_sample_pos(),
            self.target_scale * self.params.scale_factors, self.img_sample_sz)
        # Extract classification features
        x_clf = self.get_classification_features(backbone_feat)
        decoded_x, test_x = self.transformer_decoder(x_clf)

        # Location of sample
        sample_pos, sample_scales = self.get_sample_location(sample_coords)

        # Compute classification scores
        scores_raw = self.classify_target(test_x)

        # Localize the target
        translation_vec, scale_ind, s, flag = self.localize_target(
            scores_raw, sample_pos, sample_scales)
        new_pos = sample_pos[scale_ind, :] + translation_vec

        # Update position and scale
        if flag != 'not_found':
            if self.params.get('use_iou_net', True):
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind, :],
                                       sample_scales[scale_ind], scale_ind,
                                       update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.get('hard_negative_learning_rate',
                                        None) if hard_negative else None

        if update_flag and self.params.get('update_classifier', False):
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz,
                                             sample_pos[scale_ind, :],
                                             sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate,
                                   s[scale_ind, ...])

            # Update Transformer memory
            if (self.frame_num - 1) % self.params.transformer_skipping == 0:
                cur_tf_label = prutils.gaussian_label_function(
                    target_box.cpu().view(-1, 4),
                    0.1,
                    self.net.classifier.filter_size,
                    self.feature_sz,
                    self.img_sample_sz,
                    end_pad_if_even=False)

                if self.x_clf.shape[0] < self.params.transformer_memory_size:
                    self.transformer_label = torch.cat([
                        cur_tf_label.unsqueeze(1).cuda(),
                        self.transformer_label
                    ],
                                                       dim=0)
                    self.x_clf = torch.cat([x_clf, self.x_clf], dim=0)
                else:
                    self.transformer_label = torch.cat([
                        cur_tf_label.unsqueeze(1).cuda(),
                        self.transformer_label[:-1, ...]
                    ],
                                                       dim=0)
                    self.x_clf = torch.cat([x_clf, self.x_clf[:-1, ...]],
                                           dim=0)

                self.transformer_memory, _ = self.net.classifier.transformer.encoder(
                    self.x_clf.unsqueeze(1), pos=None)

        # Set the pos of the tracker to iounet pos
        if self.params.get('use_iou_net',
                           True) and flag != 'not_found' and hasattr(
                               self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()

        # Visualize and set debug info
        self.search_area_box = torch.cat(
            (sample_coords[scale_ind,
                           [1, 0]], sample_coords[scale_ind, [3, 2]] -
             sample_coords[scale_ind, [1, 0]] - 1))
        # self.debug_info['flag' + self.id_str] = flag
        # self.debug_info['max_score' + self.id_str] = max_score
        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2,
                                 'Score Map' + self.id_str)
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # Compute output bounding box
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        if self.params.get('output_not_found_box',
                           False) and flag == 'not_found':
            output_state = [-1, -1, -1, -1]
        else:
            output_state = new_state.tolist()

        # out = {'target_bbox': output_state}
        # return out
        return new_state.tolist()
Ejemplo n.º 14
0
    def track_updater(self, image) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,
                                               self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params,
                                            'update_scale_when_uncertain',
                                            True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec,
                                  sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # ------- UPDATE ------- #

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        out = {'target_bbox': new_state.tolist()}
        return new_state.tolist(), score_map.cpu().data.numpy(
        ), test_x, scale_ind, sample_pos, sample_scales, flag, s
Ejemplo n.º 15
0
    def track_updater(self, image) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Extract backbone features
        backbone_feat, sample_coords = self.extract_backbone_features(im, self.get_centered_sample_pos(),
                                                                      self.target_scale * self.params.scale_factors,
                                                                      self.img_sample_sz)
        # Extract classification features
        test_x = self.get_classification_features(backbone_feat)

        # Location of sample
        sample_pos, sample_scales = self.get_sample_location(sample_coords)

        # Compute classification scores
        scores_raw = self.classify_target(test_x)

        # Localize the target
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw, sample_scales)
        new_pos = sample_pos[scale_ind,:] + translation_vec

        self.debug_info['flag'] = flag

        # Update position and scale
        if flag != 'not_found':
            if getattr(self.params, 'use_iou_net', True):
                update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind,:], sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #



        # Set the pos of the tracker to iounet pos
        if getattr(self.params, 'use_iou_net', True) and flag != 'not_found' and hasattr(self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # Compute output bounding box
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        out = {'target_bbox': new_state.tolist(), 'score_map': score_map}
        return new_state.tolist(), score_map.cpu().data.numpy(), test_x, scale_ind, sample_pos, sample_scales, flag, s