コード例 #1
0
    def _generate_label_density(self, target_bb):
        """ Generates the gaussian label density centered at target_bb
        args:
            target_bb - target bounding box (num_images, 4)

        returns:
            torch.Tensor - Tensor of shape (num_images, label_sz, label_sz) containing the label for each sample
        """

        feat_sz = self.label_density_params[
            'feature_sz'] * self.label_density_params.get('interp_factor', 1)
        gauss_label = prutils.gaussian_label_function(
            target_bb.view(-1, 4),
            self.label_density_params['sigma_factor'],
            self.label_density_params['kernel_sz'],
            feat_sz,
            self.output_sz,
            end_pad_if_even=self.label_density_params.get(
                'end_pad_if_even', True),
            density=True,
            uni_bias=self.label_density_params.get('uni_weight', 0.0))

        gauss_label *= (gauss_label > self.label_density_params.get(
            'threshold', 0.0)).float()

        if self.label_density_params.get('normalize', False):
            g_sum = gauss_label.sum(dim=(-2, -1))
            valid = g_sum > 0.01
            gauss_label[valid, :, :] /= g_sum[valid].view(-1, 1, 1)
            gauss_label[~valid, :, :] = 1.0 / (gauss_label.shape[-2] *
                                               gauss_label.shape[-1])

        gauss_label *= 1.0 - self.label_density_params.get('shrink', 0.0)

        return gauss_label
コード例 #2
0
 def _generate_label_function(self, target_bb, sigma, kernel, feature, output_sz, end_pad_if_even, target_absent=None):
     gauss_label = prutils.gaussian_label_function(target_bb.view(-1, 4), sigma,
                                                   kernel,
                                                   feature, output_sz,
                                                   end_pad_if_even=end_pad_if_even)
     if target_absent is not None:
         gauss_label *= (1 - target_absent).view(-1, 1, 1).float()
     return gauss_label
コード例 #3
0
    def _generate_label_function(self, target_bb, is_distractor=None):
        gauss_label = prutils.gaussian_label_function(
            target_bb.view(-1, 4),
            self.label_function_params['sigma_factor'],
            self.label_function_params['kernel_sz'],
            self.label_function_params['feature_sz'],
            self.output_sz,
            end_pad_if_even=self.label_function_params.get(
                'end_pad_if_even', True))

        if is_distractor is not None:
            gauss_label *= (1 - is_distractor).view(-1, 1, 1).float()
        return gauss_label
コード例 #4
0
ファイル: processing.py プロジェクト: yjybuaa/pytracking
    def _generate_label_function(self, target_bb):
        """ Generates the gaussian label function centered at target_bb
        args:
            target_bb - target bounding box (num_images, 4)

        returns:
            torch.Tensor - Tensor of shape (num_images, label_sz, label_sz) containing the label for each sample
        """

        gauss_label = prutils.gaussian_label_function(target_bb.view(-1, 4), self.label_function_params['sigma_factor'],
                                                      self.label_function_params['kernel_sz'],
                                                      self.label_function_params['feature_sz'], self.output_sz,
                                                      end_pad_if_even=self.label_function_params.get('end_pad_if_even', True))

        return gauss_label
コード例 #5
0
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Overwrite some parameters in the classifier. (These are not generally changed)
        self._overwrite_classifier_params(feature_dim=x.shape[-3])

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get(
                'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            x = torch.cat([
                x,
                F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1),
                            p=prob,
                            training=True)
            ])

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))
        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor(
            [ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long(),
                    (self.output_sz * self.params.effective_search_area /
                     self.params.search_area_scale).long(),
                    centered=True).to(self.params.device)
            else:
                self.output_window = dcf.hann2d(self.output_sz.long(),
                                                centered=True).to(
                                                    self.params.device)
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # mask in Transformer
        self.transformer_label = prutils.gaussian_label_function(
            target_boxes.cpu().view(-1, 4),
            0.1,
            self.net.classifier.filter_size,
            self.feature_sz,
            self.img_sample_sz,
            end_pad_if_even=False)

        self.transformer_label = self.transformer_label.unsqueeze(1).cuda()
        self.x_clf = x

        self.transformer_memory, _ = self.net.classifier.transformer.encoder(
            self.x_clf.unsqueeze(1), pos=None)

        for i in range(x.shape[0]):
            _, cur_encoded_feat = self.net.classifier.transformer.decoder(
                x[i, ...].unsqueeze(0).unsqueeze(0),
                memory=self.transformer_memory,
                pos=self.transformer_label,
                query_pos=None)
            if i == 0:
                encoded_feat = cur_encoded_feat
            else:
                encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0)
        x = encoded_feat.contiguous()

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(
                x, target_boxes, num_iter=num_iter, compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
            self.init_memory(TensorList([x]))
        '''
コード例 #6
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # print(self.frame_num)

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Extract backbone features
        backbone_feat, sample_coords, im_patches = self.extract_backbone_features(
            im, self.get_centered_sample_pos(),
            self.target_scale * self.params.scale_factors, self.img_sample_sz)
        # Extract classification features
        x_clf = self.get_classification_features(backbone_feat)
        decoded_x, test_x = self.transformer_decoder(x_clf)

        # Location of sample
        sample_pos, sample_scales = self.get_sample_location(sample_coords)

        # Compute classification scores
        scores_raw = self.classify_target(test_x)

        # Localize the target
        translation_vec, scale_ind, s, flag = self.localize_target(
            scores_raw, sample_pos, sample_scales)
        new_pos = sample_pos[scale_ind, :] + translation_vec

        # Update position and scale
        if flag != 'not_found':
            if self.params.get('use_iou_net', True):
                update_scale_flag = self.params.get(
                    'update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(new_pos)
                self.refine_target_box(backbone_feat, sample_pos[scale_ind, :],
                                       sample_scales[scale_ind], scale_ind,
                                       update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        # ------- UPDATE ------- #
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.get('hard_negative_learning_rate',
                                        None) if hard_negative else None

        if update_flag and self.params.get('update_classifier', False):
            # Get train sample
            train_x = test_x[scale_ind:scale_ind + 1, ...]

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz,
                                             sample_pos[scale_ind, :],
                                             sample_scales[scale_ind])

            # Update the classifier model
            self.update_classifier(train_x, target_box, learning_rate,
                                   s[scale_ind, ...])

            if (self.frame_num - 1) % self.params.transformer_skipping == 0:
                # Update Transformer memory
                cur_tf_label = prutils.gaussian_label_function(
                    target_box.cpu().view(-1, 4),
                    0.1,
                    self.net.classifier.filter_size,
                    self.feature_sz,
                    self.img_sample_sz,
                    end_pad_if_even=False)

                if self.x_clf.shape[0] < self.params.transformer_memory_size:
                    self.transformer_label = torch.cat([
                        cur_tf_label.unsqueeze(1).cuda(),
                        self.transformer_label
                    ],
                                                       dim=0)
                    self.x_clf = torch.cat([x_clf, self.x_clf], dim=0)
                else:
                    self.transformer_label = torch.cat([
                        cur_tf_label.unsqueeze(1).cuda(),
                        self.transformer_label[:-1, ...]
                    ],
                                                       dim=0)
                    self.x_clf = torch.cat([x_clf, self.x_clf[:-1, ...]],
                                           dim=0)
                self.transformer_memory, _ = self.net.classifier.transformer.encoder(
                    self.x_clf.unsqueeze(1), pos=None)

        # Set the pos of the tracker to iounet pos
        if self.params.get('use_iou_net',
                           True) and flag != 'not_found' and hasattr(
                               self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()

        # Visualize and set debug info
        self.search_area_box = torch.cat(
            (sample_coords[scale_ind,
                           [1, 0]], sample_coords[scale_ind, [3, 2]] -
             sample_coords[scale_ind, [1, 0]] - 1))
        self.debug_info['flag' + self.id_str] = flag
        self.debug_info['max_score' + self.id_str] = max_score
        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2,
                                 'Score Map' + self.id_str)
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map,
                        5,
                        title='Max score = {:.2f}'.format(max_score))

        # Compute output bounding box
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        if self.params.get('output_not_found_box',
                           False) and flag == 'not_found':
            output_state = [-1, -1, -1, -1]
        else:
            output_state = new_state.tolist()

        out = {'target_bbox': output_state}
        return out