コード例 #1
class DRNet(BaseTracker):
    def initialize_features(self):
        if not getattr(self, 'features_initialized', False):
        self.features_initialized = True

    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /
        # print("self.target_scale",self.target_scale)
        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)
        self.alpha = getattr(self.params, 'alpha', 0)
        self.beta = getattr(self.params, 'beta', 0)
        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')[0]

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long() * self.params.effective_search_area /
                self.output_window = dcf.hann2d(self.output_sz.long(),

        # Initialize some learning things

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        ## 初始化回归分支

        # Initialize projection matrix

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_drnet = self.pos.clone()

        self.time += time.time() - tic

    def init_optimization(self, train_x, init_y):
        # Initialize filter
        filter_init_method = getattr(self.params, 'filter_init_method',

        self.filter = TensorList([
            x.new_zeros(1, cdim, sz[0], sz[1]) for x, cdim, sz in zip(
                train_x, self.compressed_dim, self.kernel_size)

        if filter_init_method == 'zeros':
        elif filter_init_method == 'randn':
            for f in self.filter:
                f.normal_(0, 1 / f.numel())
        elif filter_init_method == 'msra':
            for f in self.filter:
                nn.init.kaiming_normal_(f, a=1)

            raise ValueError('Unknown "filter_init_method"')

        # Get parameters
        self.params.update_projection_matrix = getattr(
            self.params, 'update_projection_matrix',
            True) and self.params.use_projection_matrix
        optimizer = getattr(self.params, 'optimizer', 'GaussNewtonCG')

        # Setup factorized joint optimization
        if self.params.update_projection_matrix:
            # self.joint_problem = FactorizedConvProblem(self.init_training_samples, init_y, self.filter_reg,
            #                                            self.fparams.attribute('projection_reg'), self.params, self.init_sample_weights,
            #                                            self.projection_activation, self.response_activation)
            self.joint_problem = FactorizedConvProblem(
                self.init_training_samples, init_y, self.filter_reg,
                self.fparams.attribute('projection_reg'), self.params,
                self.init_sample_weights, self.projection_activation,

            # Variable containing both filter and projection matrix

            # joint_var = TensorList([TensorList([tmp_filter,tmp_p]) for tmp_filter,tmp_p in zip(self.filter,self.projection_matrix)])
            joint_var = self.filter.concat(self.projection_matrix)
            # print(len(joint_var),len(self.init_training_samples),len(init_y))
            # Initialize optimizer
            analyze_convergence = getattr(self.params, 'analyze_convergence',
            if optimizer == 'GaussNewtonCG':
                # self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, plotting=(self.params.debug >= 3), analyze=analyze_convergence, fig_num=(12, 13, 14))
                self.joint_optimizer = GaussNewtonCG(
                    plotting=(self.params.debug >= 3),
                    fig_num=(12, 13, 14))
            elif optimizer == 'GradientDescentL2':
                self.joint_optimizer = GradientDescentL2(
                    plotting=(self.params.debug >= 3),
                    fig_num=(12, 13))

            # Do joint optimization
            if isinstance(self.params.init_CG_iter, (list, tuple)):
                    self.params.init_CG_iter // self.params.init_GN_iter,
                # for tmp_joint_optimizer in self.joint_optimizer:
                #     tmp_joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

            if analyze_convergence:
                opt_name = 'CG' if getattr(self.params, 'CG_optimizer',
                                           True) else 'GD'
                for val_name, values in zip(['loss', 'gradient'], [
                    val_str = ' '.join(
                        ['{:.8e}'.format(v.item()) for v in values])
                    file_name = '{}_{}.txt'.format(opt_name, val_name)
                    with open(file_name, 'a') as f:
                        f.write(val_str + '\n')
                raise RuntimeError('Exiting')

        # Re-project samples with the new projection matrix
        compressed_samples = self.project_sample(self.init_training_samples,
        for train_samp, init_samp in zip(self.training_samples,
            train_samp[:init_samp.shape[0], ...] = init_samp

        self.hinge_mask = None

        # Initialize optimizer
        self.conv_problem = ConvProblem(self.training_samples, self.y,
                                        self.filter_reg, self.sample_weights,
                                        self.response_activation, self.alpha,
        # self.conv_problem = [ConvProblem(TensorList([tmp_train]), TensorList([tmp_y]), self.filter_reg, TensorList([tmp_weight]), self.response_activation) for tmp_train,tmp_y,tmp_weight in zip(self.training_samples,self.y,self.sample_weights)]
        if optimizer == 'GaussNewtonCG':
            self.filter_optimizer = ConjugateGradient(
                debug=(self.params.debug >= 3),
                fig_num=(12, 13))
            # self.filter_optimizer = [ConjugateGradient(tmp_problem, TensorList([tmp_filter]), fletcher_reeves=self.params.fletcher_reeves,
            # direction_forget_factor=self.params.direction_forget_factor, debug=(self.params.debug>=3), fig_num=(12,13)) for tmp_problem,tmp_filter in zip(self.conv_problem,self.filter)]
        elif optimizer == 'GradientDescentL2':
            self.filter_optimizer = GradientDescentL2(
                debug=(self.params.debug >= 3),

        # Transfer losses from previous optimization
        if self.params.update_projection_matrix:
            self.filter_optimizer.residuals = self.joint_optimizer.residuals
            self.filter_optimizer.losses = self.joint_optimizer.losses
            # for tmp_filter_optimizer,tmp_joint_optimizer in zip(self.filter_optimizer,self.joint_optimizer):
            #     tmp_filter_optimizer.residuals = tmp_joint_optimizer.residuals
            #     tmp_filter_optimizer.losses = tmp_joint_optimizer.losses

        if not self.params.update_projection_matrix:
            # for tmp_filter_optimizer in self.filter_optimizer:
            #     tmp_filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        # for tmp_filter_optimizer in self.filter_optimizer:
        #     tmp_filter_optimizer.run(self.params.post_init_CG_iter)

        # Free memory
        del self.init_training_samples
        if self.params.use_projection_matrix:
            # for tmp_joint_problem,tmp_joint_optimizer in zip(self.joint_problem,self.joint_optimizer):
            del self.joint_problem, self.joint_optimizer

    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        # print('33333',self.target_scale, self.params.scale_factors)
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,

        # Compute scores
        scores_raw = self.apply_filter(test_x)

        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            # if self.use_iou_net:
            #     update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
            #     if getattr(self.params, 'use_classifier', True):
            #         self.update_state(sample_pos + translation_vec)
            #     self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            if self.use_iou_net:
                update_scale_flag = getattr(self.params,
                                            True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.predict_target_box(sample_pos, sample_scales[scale_ind],
                                        scale_ind, update_scale_flag)

            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec,

        if self.params.debug >= 2:
            show_tensor(s[scale_ind, ...],
                        title='Max score = {:.2f}'.format(
                            torch.max(s[scale_ind, ...]).item()))

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
            # for tmp_filter_optimizer in self.filter_optimizer:
            #     tmp_filter_optimizer.run(self.params.hard_negative_CG_iter)

        elif (self.frame_num - 1) % self.params.train_skipping == 0:
            # for tmp_filter_optimizer in self.filter_optimizer:
            #     tmp_filter_optimizer.run(self.params.CG_iter)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_drnet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        return new_state.tolist()

    def apply_filter(self, sample_x: TensorList):
        return TensorList([
            operation.conv2d(tmp_x, tmp_filter, mode='same')
            for tmp_x, tmp_filter in zip(sample_x, self.filter)

    def localize_target(self, scores_raw):
        # Weighted sum (if multiple features) with interpolation in fourier domain
        weight = self.fparams.attribute('translation_weight', 1.0)
        # scores_raw[0]=scores_raw[0]*0.7
        # scores_raw[1]=scores_raw[1]*0.3
        # scores_raw = weight * scores_raw
        sf_weighted = fourier.cfft2(scores_raw) / (scores_raw.size(2) *

        for i, (sz, ksz) in enumerate(zip(self.feature_sz, self.kernel_size)):
            sf_weighted[i] = fourier.shift_fs(
                math.pi * (1 - torch.Tensor([ksz[0] % 2, ksz[1] % 2]) / sz))

        scores_fs = fourier.sum_fs(sf_weighted)
        scores = fourier.sample_fs(scores_fs, self.output_sz)

        if self.output_window is not None and not getattr(
                self.params, 'perform_hn_without_windowing', False):
            scores *= self.output_window

        if getattr(self.params, 'advanced_localization', False):
            return self.localize_advanced(scores)

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score, dim=0)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        disp = (max_disp +
                self.output_sz / 2) % self.output_sz - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp[scale_ind, ...].view(-1) * (
            self.img_support_sz / self.output_sz) * self.target_scale
        translation_vec *= self.params.scale_factors[scale_ind]

        # Shift the score output for visualization purposes
        if self.params.debug >= 2:
            sz = scores.shape[-2:]
            scores = torch.cat(
                [scores[..., sz[0] // 2:, :], scores[..., :sz[0] // 2, :]], -2)
            scores = torch.cat(
                [scores[..., :, sz[1] // 2:], scores[..., :, :sz[1] // 2]], -1)

        return translation_vec, scale_ind, scores, None

    def localize_advanced(self, scores):
        """Does the advanced localization with hard negative detection and target not found."""

        sz = scores.shape[-2:]

        if self.output_window is not None and getattr(
                self.params, 'perform_hn_without_windowing', False):
            scores_orig = scores.clone()

            scores_orig = torch.cat([
                scores_orig[..., (sz[0] + 1) // 2:, :],
                scores_orig[..., :(sz[0] + 1) // 2, :]
            ], -2)
            scores_orig = torch.cat([
                scores_orig[..., :, (sz[1] + 1) // 2:],
                scores_orig[..., :, :(sz[1] + 1) // 2]
            ], -1)

            scores *= self.output_window

        # Shift scores back
        scores = torch.cat([
                   (sz[0] + 1) // 2:, :], scores[..., :(sz[0] + 1) // 2, :]
        ], -2)
        scores = torch.cat([
            scores[..., :,
                   (sz[1] + 1) // 2:], scores[..., :, :(sz[1] + 1) // 2]
        ], -1)

        # Find maximum
        max_score1, max_disp1 = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score1, dim=0)
        max_score1 = max_score1[scale_ind]
        max_disp1 = max_disp1[scale_ind, ...].float().cpu().view(-1)
        target_disp1 = max_disp1 - self.output_sz // 2
        translation_vec1 = target_disp1 * (self.img_support_sz /
                                           self.output_sz) * self.target_scale

        if max_score1.item() < self.params.target_not_found_threshold:
            return translation_vec1, scale_ind, scores, 'not_found'

        if self.output_window is not None and getattr(
                self.params, 'perform_hn_without_windowing', False):
            scores = scores_orig

        # Mask out target neighborhood
        target_neigh_sz = self.params.target_neighborhood_scale * self.target_sz / self.target_scale
        tneigh_top = max(
            round(max_disp1[0].item() - target_neigh_sz[0].item() / 2), 0)
        tneigh_bottom = min(
            round(max_disp1[0].item() + target_neigh_sz[0].item() / 2 + 1),
        tneigh_left = max(
            round(max_disp1[1].item() - target_neigh_sz[1].item() / 2), 0)
        tneigh_right = min(
            round(max_disp1[1].item() + target_neigh_sz[1].item() / 2 + 1),
        scores_masked = scores[scale_ind:scale_ind + 1, ...].clone()
        scores_masked[..., tneigh_top:tneigh_bottom,
                      tneigh_left:tneigh_right] = 0

        # Find new maximum
        max_score2, max_disp2 = dcf.max2d(scores_masked)
        max_disp2 = max_disp2.float().cpu().view(-1)
        target_disp2 = max_disp2 - self.output_sz // 2
        translation_vec2 = target_disp2 * (self.img_support_sz /
                                           self.output_sz) * self.target_scale

        # Handle the different cases
        if max_score2 > self.params.distractor_threshold * max_score1:
            disp_norm1 = torch.sqrt(torch.sum(target_disp1**2))
            disp_norm2 = torch.sqrt(torch.sum(target_disp2**2))
            disp_threshold = self.params.dispalcement_scale * math.sqrt(
                sz[0] * sz[1]) / 2

            if disp_norm2 > disp_threshold and disp_norm1 < disp_threshold:
                return translation_vec1, scale_ind, scores, 'hard_negative'
            if disp_norm2 < disp_threshold and disp_norm1 > disp_threshold:
                return translation_vec2, scale_ind, scores, 'hard_negative'
            if disp_norm2 > disp_threshold and disp_norm1 > disp_threshold:
                return translation_vec1, scale_ind, scores, 'uncertain'

            # If also the distractor is close, return with highest score
            return translation_vec1, scale_ind, scores, 'uncertain'

        if max_score2 > self.params.hard_negative_threshold * max_score1 and max_score2 > self.params.target_not_found_threshold:
            return translation_vec1, scale_ind, scores, 'hard_negative'

        return translation_vec1, scale_ind, scores, None

    def extract_sample(self, im: torch.Tensor, pos: torch.Tensor, scales,
                       sz: torch.Tensor):
        return self.params.features.extract(im, pos, scales, sz)

    def get_iou_features(self):
        return self.params.features.get_unique_attribute('iounet_features')

    def get_iou_backbone_features(self):
        return self.params.features.get_unique_attribute(

    def extract_processed_sample(self, im: torch.Tensor, pos: torch.Tensor,
                                 sz: torch.Tensor) -> (TensorList, TensorList):
        x = self.extract_sample(im, pos, scales, sz)
        return self.preprocess_sample(self.project_sample(x))

    def preprocess_sample(self, x: TensorList) -> (TensorList, TensorList):
        if getattr(self.params, '_feature_window', False):
            x = x * self.feature_window
        return x

    def project_sample(self, x: TensorList, proj_matrix=None):
        # Apply projection matrix

        if proj_matrix is None:
            proj_matrix = self.projection_matrix
        return TensorList([
            self.projection_activation(operation.conv2d(tmp_x, tmp_p))
            for tmp_x, tmp_p in zip(x, proj_matrix)

    def init_learning(self):
        # Get window function
        self.feature_window = TensorList(
            [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Filter regularization
        self.filter_reg = self.fparams.attribute('filter_reg')

        # Activation function after the projection matrix (phi_1 in the paper)
        projection_activation = getattr(self.params, 'projection_activation',
        if isinstance(projection_activation, tuple):
            projection_activation, act_param = projection_activation

        if projection_activation == 'none':
            self.projection_activation = lambda x: x
        elif projection_activation == 'relu':
            self.projection_activation = torch.nn.ReLU(inplace=True)
        elif projection_activation == 'elu':
            self.projection_activation = torch.nn.ELU(inplace=True)
        elif projection_activation == 'mlu':
            self.projection_activation = lambda x: F.elu(
                F.leaky_relu(x, 1 / act_param), act_param)
            raise ValueError('Unknown activation')

        # Activation function after the output scores (phi_2 in the paper)
        response_activation = getattr(self.params, 'response_activation',
        if isinstance(response_activation, tuple):
            response_activation, act_param = response_activation

        if response_activation == 'none':
            self.response_activation = lambda x: x
        elif response_activation == 'relu':
            self.response_activation = torch.nn.ReLU(inplace=True)
        elif response_activation == 'elu':
            self.response_activation = torch.nn.ELU(inplace=True)
        elif response_activation == 'mlu':
            self.response_activation = lambda x: F.elu(
                F.leaky_relu(x, 1 / act_param), act_param)
            raise ValueError('Unknown activation')

    def generate_init_samples(self, im: torch.Tensor) -> TensorList:
        """Generate augmented initial samples."""

        # Compute augmentation size
        aug_expansion_factor = getattr(self.params,
                                       'augmentation_expansion_factor', None)
        aug_expansion_sz = self.img_sample_sz.clone()
        aug_output_sz = None
        if aug_expansion_factor is not None and aug_expansion_factor != 1:
            aug_expansion_sz = (self.img_sample_sz *
            aug_expansion_sz += (aug_expansion_sz -
                                 self.img_sample_sz.long()) % 2
            aug_expansion_sz = aug_expansion_sz.float()
            aug_output_sz = self.img_sample_sz.long().tolist()

        # Random shift operator
        get_rand_shift = lambda: None
        random_shift_factor = getattr(self.params, 'random_shift_factor', 0)
        if random_shift_factor > 0:
            get_rand_shift = lambda: (
                (torch.rand(2) - 0.5
                 ) * self.img_sample_sz * random_shift_factor).long().tolist()

        # Create transofmations
        self.transforms = [augmentation.Identity(aug_output_sz)]
        if 'shift' in self.params.augmentation:
                augmentation.Translation(shift, aug_output_sz)
                for shift in self.params.augmentation['shift']
        if 'relativeshift' in self.params.augmentation:
            get_absolute = lambda shift: (torch.Tensor(shift) * self.
                                          img_sample_sz / 2).long().tolist()
                augmentation.Translation(get_absolute(shift), aug_output_sz)
                for shift in self.params.augmentation['relativeshift']
        if 'fliplr' in self.params.augmentation and self.params.augmentation[
                augmentation.FlipHorizontal(aug_output_sz, get_rand_shift()))
        if 'blur' in self.params.augmentation:
                augmentation.Blur(sigma, aug_output_sz, get_rand_shift())
                for sigma in self.params.augmentation['blur']
        if 'scale' in self.params.augmentation:
                augmentation.Scale(scale_factor, aug_output_sz,
                for scale_factor in self.params.augmentation['scale']
        if 'rotate' in self.params.augmentation:
                augmentation.Rotate(angle, aug_output_sz, get_rand_shift())
                for angle in self.params.augmentation['rotate']

        # Generate initial samples
        init_samples = self.params.features.extract_transformed(
            im, self.pos, self.target_scale, aug_expansion_sz, self.transforms)

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        # Add dropout samples
        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            for i, use_aug in enumerate(
                if use_aug:
                    init_samples[i] = torch.cat([
                        F.dropout2d(init_samples[i][0:1, ...].expand(
                            num, -1, -1, -1),

        return init_samples

    def init_projection_matrix(self, x):
        # Set if using projection matrix
        self.params.use_projection_matrix = getattr(self.params,

        if self.params.use_projection_matrix:
            self.compressed_dim = self.fparams.attribute(
                'compressed_dim', None)[0]

            proj_init_method = getattr(self.params, 'proj_init_method', 'pca')
            if proj_init_method == 'pca':
                x_mat = TensorList([
                    e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone()
                    for e in x
                x_mat -= x_mat.mean(dim=1, keepdim=True)
                cov_x = x_mat @ x_mat.t()
                self.projection_matrix = TensorList([
                    None if cdim is None else torch.svd(C)[0]
                    [:, :cdim].t().unsqueeze(-1).unsqueeze(-1).clone()
                    for C, cdim in zip(cov_x, self.compressed_dim)
            elif proj_init_method == 'randn':
                self.projection_matrix = TensorList([
                    None if cdim is None else ex.new_zeros(
                        cdim, ex.shape[1], 1, 1).normal_(
                            0, 1 / math.sqrt(ex.shape[1]))
                    for ex, cdim in zip(x, self.compressed_dim)
            elif proj_init_method == 'msra':
                self.projection_matrix = TensorList([
                    None if cdim is None else nn.init.kaiming_normal_(
                        ex.new_zeros(cdim, ex.shape[1], 1, 1), a=1)
                    for ex, cdim in zip(x, self.compressed_dim)
            self.compressed_dim = x.size(1)
            self.projection_matrix = TensorList([None] * len(x))

    def init_label_function(self, train_x):
        # Allocate label function
        self.y = TensorList([
            x.new_zeros(self.params.sample_memory_size, 1, x.shape[2],
                        x.shape[3]) for x in train_x
        # Output sigma factor
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        # self.sigma = (self.feature_sz / self.img_support_sz * self.base_target_sz).prod().sqrt() * output_sigma_factor * torch.ones(2)
        self.sigma = TensorList([
            (f / self.img_support_sz * self.base_target_sz).prod().sqrt() *
            output_sigma_factor[0][num] * torch.ones(2)
            for num, f in enumerate(self.feature_sz)
        # Center pos in normalized coords
        target_center_norm = (self.pos - self.pos.round()) / (
            self.target_scale * self.img_support_sz)

        # Generate label functions
        for y, sig, sz, ksz, x in zip(self.y, self.sigma, self.feature_sz,
                                      self.kernel_size, train_x):
            center_pos = sz * target_center_norm + 0.5 * torch.Tensor(
                [(ksz[0] + 1) % 2, (ksz[1] + 1) % 2])
            for i, T in enumerate(self.transforms[:x.shape[0]]):
                sample_center = center_pos + torch.Tensor(
                    T.shift) / self.img_support_sz * sz
                y[i, 0,
                  ...] = dcf.label_function_spatial(sz, sig, sample_center)

        # Return only the ones to use for initial training
        return TensorList(
            [y[:x.shape[0], ...] for y, x in zip(self.y, train_x)])

    def init_memory(self, train_x):
        # Initialize first-frame training samples
        self.num_init_samples = train_x.size(0)
        self.init_sample_weights = TensorList(
            [x.new_ones(1) / x.shape[0] for x in train_x])
        self.init_training_samples = train_x

        # Sample counters and weights
        self.num_stored_samples = self.num_init_samples.copy()
        self.previous_replace_ind = [None] * len(self.num_stored_samples)
        self.sample_weights = TensorList(
            [x.new_zeros(self.params.sample_memory_size) for x in train_x])
        for sw, init_sw, num in zip(self.sample_weights,
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList([
            x.new_zeros(self.params.sample_memory_size, cdim, x.shape[2],
            for x, cdim in zip(train_x, self.compressed_dim)

    def update_memory(self,
                      sample_x: TensorList,
                      sample_y: TensorList,
        replace_ind = self.update_sample_weights(self.sample_weights,
                                                 self.fparams, learning_rate)
        self.previous_replace_ind = replace_ind
        for train_samp, x, ind in zip(self.training_samples, sample_x,
            train_samp[ind:ind + 1, ...] = x
        for y_memory, y, ind in zip(self.y, sample_y, replace_ind):
            y_memory[ind:ind + 1, ...] = y
        if self.hinge_mask is not None:
            for m, y, ind in zip(self.hinge_mask, sample_y, replace_ind):
                m[ind:ind + 1,
                  ...] = (y >= self.params.hinge_threshold).float()
        self.num_stored_samples += 1

    def update_sample_weights(self,
        # Update weights and get index to replace in memory
        replace_ind = []
        for sw, prev_ind, num_samp, num_init, fpar in zip(
                sample_weights, previous_replace_ind, num_stored_samples,
                num_init_samples, fparams):
            lr = learning_rate
            if lr is None:
                lr = fpar.learning_rate

            init_samp_weight = getattr(fpar, 'init_samples_minimum_weight',
            if init_samp_weight == 0:
                init_samp_weight = None
            s_ind = 0 if init_samp_weight is None else num_init

            if num_samp == 0 or lr == 1:
                sw[:] = 0
                sw[0] = 1
                r_ind = 0
                # Get index to replace
                _, r_ind = torch.min(sw[s_ind:], 0)
                r_ind = r_ind.item() + s_ind

                # Update weights
                if prev_ind is None:
                    sw /= 1 - lr
                    sw[r_ind] = lr
                    sw[r_ind] = sw[prev_ind] / (1 - lr)

            sw /= sw.sum()
            if init_samp_weight is not None and sw[:num_init].sum(
            ) < init_samp_weight:
                sw /= init_samp_weight + sw[num_init:].sum()
                sw[:num_init] = init_samp_weight / num_init


        return replace_ind

    def get_label_function(self, sample_pos, sample_scale):
        # Generate label function
        train_y = TensorList()
        target_center_norm = (self.pos - sample_pos) / (sample_scale *
        for sig, sz, ksz in zip(self.sigma, self.feature_sz, self.kernel_size):
            center = sz * target_center_norm + 0.5 * torch.Tensor(
                [(ksz[0] + 1) % 2, (ksz[1] + 1) % 2])
            train_y.append(dcf.label_function_spatial(sz, sig, center))
        return train_y

    def update_state(self, new_pos, new_scale=None):
        # Update scale
        if new_scale is not None:
            self.target_scale = new_scale.clamp(self.min_scale_factor,
            self.target_sz = self.base_target_sz * self.target_scale

        # Update pos
        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.pos = torch.max(torch.min(new_pos, self.image_sz - inside_offset),

    def get_iounet_box(self, pos, sz, sample_pos, sample_scale):
        """All inputs in original image coordinates"""
        # print(self.iou_img_sample_sz,sample_scale)
        box_center = (pos - sample_pos) / sample_scale + (
            self.iou_img_sample_sz - 1) / 2
        box_sz = sz / sample_scale
        target_ul = box_center - (box_sz - 1) / 2
        # print(target_ul,box_sz)
        return torch.cat([target_ul.flip((0, )), box_sz.flip((0, ))])

    def init_dr_net(self):
        # Setup IoU net
        self.box_predictor = self.params.features.get_unique_attribute(
        for p in self.box_predictor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz,
        target_boxes = TensorList()
        target_boxes = torch.cat(target_boxes.view(1, 4),

        # Get iou features
        iou_backbone_features = self.get_iou_backbone_features()

        # Remove other augmentations such as rotation
        iou_backbone_features = TensorList(
            [x[:target_boxes.shape[0], ...] for x in iou_backbone_features])

        # Extract target feat
        with torch.no_grad():
            target_feat = self.box_predictor.get_filter(
                iou_backbone_features, target_boxes)
        self.target_feat = TensorList(
            [x.detach().mean(0) for x in target_feat])

        if getattr(self.params, 'iounet_not_use_reference', False):
            self.target_feat = TensorList([
                                tf.norm() / tf.numel())
                for tf in self.target_feat

    def offset2box(self, init_box, offset):
        ctr_x = init_box[:, 0] + 0.5 * init_box[:, 2]
        ctr_y = init_box[:, 1] + 0.5 * init_box[:, 3]
        widths = init_box[:, 2]
        heights = init_box[:, 3]
        # ctr_x, ctr_y, widths, heights = init_box#xyxy2xywh(init_box)
        # print(ctr_x, ctr_y, widths, heights)

        wx, wy, ww, wh = 1, 1, 1, 1
        dx = offset[:, 0::4] / wx
        dy = offset[:, 1::4] / wy
        dw = offset[:, 2::4] / ww
        dh = offset[:, 3::4] / wh

        # Prevent sending too large values into np.exp()
        dw = torch.clamp(dw, max=np.log(1000. / 16.))
        dh = torch.clamp(dh, max=np.log(1000. / 16.))

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        pred_boxes = offset.new_zeros(offset.shape)
        # # x1
        # pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
        # # y1
        # pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
        # # x2
        # pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
        # # y2
        # pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
        pred_boxes[:, 0::4] = pred_ctr_x
        # y1
        pred_boxes[:, 1::4] = pred_ctr_y
        # x2
        pred_boxes[:, 2::4] = pred_w
        # y2
        pred_boxes[:, 3::4] = pred_h
        return pred_boxes

    def predict_target_box(self,
        # print(self.pos,sample_pos,self.target_sz)
        init_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos,
        init_box = init_box.unsqueeze(0)
        init_box = init_box.unsqueeze(0)
        init_box = init_box.cuda()
        # print(init_box.shape)
        iou_features = self.get_iou_features()
        iou_features = TensorList(
            [x[scale_ind:scale_ind + 1, ...] for x in iou_features])
        reg = self.box_predictor.predict_box(self.target_feat, iou_features,
        # print('reg',reg)
        init_box = init_box.view(-1, 4)
        reg = reg.view(-1, 4)

        predicted_box = self.offset2box(init_box, reg)

        # print(predicted_box.shape)
        predicted_box = predicted_box[0, :].cpu()
        # print(predicted_box.shape,self.iou_img_sample_sz.shape)
        new_pos = predicted_box[:2] - (self.iou_img_sample_sz - 1) / 2
        new_pos = new_pos.flip((0, )) * sample_scale + sample_pos
        new_target_sz = predicted_box[2:].flip((0, )) * sample_scale
        new_scale = torch.sqrt(new_target_sz.prod() /

        # Update position
        # new_pos = predicted_box[:2] + predicted_box[2:]/2 - (self.iou_img_sample_sz - 1) / 2
        # new_pos = new_pos.flip((0,)) * sample_scale + sample_pos
        # new_target_sz = predicted_box[2:].flip((0,)) * sample_scale
        # new_scale = torch.sqrt(new_target_sz.prod() / self.base_target_sz.prod())

        self.pos_drnet = new_pos.clone()
        # print('pos',self.pos,new_pos)
        if getattr(self.params, 'use_iounet_pos_for_learning', True):
            self.pos = new_pos.clone()
        # print('target_sz',self.target_sz,new_target_sz)
        self.target_sz = new_target_sz

        if update_scale:
            self.target_scale = new_scale
コード例 #2
    def initialize(self, image1, image2, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Check if image is color
        self.params.features.set_is_color(image1.shape[2] == 3)
        self.params.features.set_is_color(image2.shape[2] == 3)
        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        self.target_scale =  math.sqrt(search_area) / self.params.image_sample_size

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Set sizes
        self.img_sample_sz = torch.Tensor([self.params.image_sample_size, self.params.image_sample_size])
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        if getattr(self.params, 'score_upsample_factor', None) is None:
            self.output_sz = self.feature_sz[0]
            self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        self.params.score_fusion_strategy = getattr(self.params, 'score_fusion_strategy', 'default')
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device)

            self.output_window = self.output_window.squeeze(0)
        # Convert image
        im1 = numpy_to_torch(image1)
        im2 = numpy_to_torch(image2)
        #self.im = im

        # Setup bounds
        self.image_sz = torch.Tensor([im1.shape[2], im1.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x1 = self.generate_init_samples(im1)
        x2 = self.generate_init_samples(im2)

        x = TensorList([torch.cat((v,i),1) for v, i in zip(x1, x2)])


        if self.use_iou_net:

        # Init memory
        # self.init_memory(x)

        self.time += time.time() - tic
コード例 #3
ファイル: deep.py プロジェクト: danielism97/CFKD
 def stride(self):
     return TensorList([
         s * self.layer_stride[l]
         for l, s in zip(self.output_layers, self.pool_stride)
コード例 #4
ファイル: eco.py プロジェクト: yaolinhua/pytracking-master
class ECO(BaseTracker):

    def initialize_features(self):
        if not getattr(self, 'features_initialized', False):
        self.features_initialized = True

    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz    # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                                self.params.interpolation_bicubic_a, self.params.interpolation_centering,
                                                self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz])

        # Get regularization filter
        self.reg_filter = TensorList([dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device)
                                      for fparams in self.fparams])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma)])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute('learning_rate')
        if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate

        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList([e.permute(1,0,2,3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([torch.svd(C)[0][:,:cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim)])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(shift) / self.img_support_sz
                    xf[1+i:2+i,...] = fourier.shift_fs(xf[1+i:2+i,...], shift=shift_samp)

        # Shift sample
        shift_samp = 2*math.pi * (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList([xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None]*len(self.num_stored_samples)
        self.sample_weights = TensorList([xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList(
            [xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Initialize filter
        self.filter = TensorList(
            [xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug>=3))

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:,:,:init_samp.shape[2],:,:] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone()

        if not self.params.update_projection_matrix:

        # Post optimization


    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, sample_pos, sample_scales, self.img_sample_sz)

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)
        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor)

        if self.params.debug >= 2:
            show_tensor(s[scale_ind,...], 5)
        if self.params.debug >= 3:
            for i, hf in enumerate(self.filter):
                show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i)

        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList([xf[scale_ind:scale_ind+1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2*math.pi * (self.pos - sample_pos) / (sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Update memory

        # Train filter
        if self.frame_num % self.params.train_skipping == 1:
            self.filter_optimizer.run(self.params.CG_iter, train_xf)

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        return new_state.tolist()

    def apply_filter(self, sample_xf: TensorList) -> torch.Tensor:
        return complex.mult(self.filter, sample_xf).sum(1, keepdim=True)

    def localize_target(self, sf: TensorList):
        if self.params.score_fusion_strategy == 'sum':
            scores = fourier.sample_fs(fourier.sum_fs(sf), self.output_sz)
        elif self.params.score_fusion_strategy == 'weightedsum':
            weight = self.fparams.attribute('translation_weight')
            scores = fourier.sample_fs(fourier.sum_fs(weight * sf), self.output_sz)
        elif self.params.score_fusion_strategy == 'transcale':
            alpha = self.fparams.attribute('scale_weight')
            beta = self.fparams.attribute('translation_weight')
            sample_sz = torch.round(self.output_sz.view(1,-1) * self.params.scale_factors.view(-1,1))
            scores = 0
            for sfe, a, b in zip(sf, alpha, beta):
                sfe = fourier.shift_fs(sfe, math.pi*torch.ones(2))
                scores_scales = []
                for sind, sz in enumerate(sample_sz):
                    pd = (self.output_sz-sz)/2
                    scores_scales.append(F.pad(fourier.sample_fs(sfe[sind:sind+1,...], sz),
                                        (math.floor(pd[1].item()), math.ceil(pd[1].item()),
                                         math.floor(pd[0].item()), math.ceil(pd[0].item()))))
                scores_cat = torch.cat(scores_scales)
                scores = scores + (b - a) * scores_cat.mean(dim=0, keepdim=True) + a * scores_cat
            raise ValueError('Unknown score fusion strategy.')

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score, dim=0)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2
        elif self.params.score_fusion_strategy == 'transcale':
            disp = max_disp - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp[scale_ind, ...].view(-1) * (self.img_support_sz / self.output_sz) * self.target_scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            translation_vec *= self.params.scale_factors[scale_ind]

        return translation_vec, scale_ind, scores

    def extract_sample(self, im: torch.Tensor, pos: torch.Tensor, scales, sz: torch.Tensor):
        return self.params.features.extract(im, pos, scales, sz)

    def extract_fourier_sample(self, im: torch.Tensor, pos: torch.Tensor, scales, sz: torch.Tensor) -> TensorList:
        x = self.extract_sample(im, pos, scales, sz)
        return self.preprocess_sample(self.project_sample(x))

    def preprocess_sample(self, x: TensorList) -> TensorList:
        x *= self.window
        sample_xf = fourier.cfft2(x)
        return TensorList([dcf.interpolate_dft(xf, bf) for xf, bf in zip(sample_xf, self.interp_fs)])

    def project_sample(self, x: TensorList):
        def _project_sample(x: torch.Tensor, P: torch.Tensor):
            if P is None:
                return x
            return torch.matmul(x.permute(2, 3, 0, 1), P).permute(2, 3, 0, 1)

        return _project_sample(x, self.projection_matrix)

    def generate_init_samples(self, im: torch.Tensor) -> TensorList:
        # Do data augmentation
        transforms = [augmentation.Identity()]
        if 'shift' in self.params.augmentation:
            transforms.extend([augmentation.Translation(shift) for shift in self.params.augmentation['shift']])
        if 'fliplr' in self.params.augmentation and self.params.augmentation['fliplr']:
        if 'rotate' in self.params.augmentation:
            transforms.extend([augmentation.Rotate(angle) for angle in self.params.augmentation['rotate']])
        if 'blur' in self.params.augmentation:
            transforms.extend([augmentation.Blur(sigma) for sigma in self.params.augmentation['blur']])

        init_samples = self.params.features.extract_transformed(im, self.pos.round(), self.target_scale, self.img_sample_sz, transforms)

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(self.fparams.attribute('use_augmentation')):
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            for i, use_aug in enumerate(self.fparams.attribute('use_augmentation')):
                if use_aug:
                    init_samples[i] = torch.cat([init_samples[i], F.dropout2d(init_samples[i][0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        return init_samples

    def update_memory(self, sample_xf: TensorList):
        # Update weights and get index to replace
        replace_ind = self.update_sample_weights()
        for train_samp, xf, ind in zip(self.training_samples, sample_xf, replace_ind):
            train_samp[:,:,ind:ind+1,:,:] = xf.permute(2, 3, 0, 1, 4)

    def update_sample_weights(self):
        replace_ind = []
        for sw, prev_ind, num_samp, fparams in zip(self.sample_weights, self.previous_replace_ind, self.num_stored_samples, self.fparams):
            if num_samp == 0 or fparams.learning_rate == 1:
                sw[:] = 0
                sw[0] = 1
                r_ind = 0
                # Get index to replace
                _, r_ind = torch.min(sw, 0)
                r_ind = r_ind.item()

                # Update weights
                if prev_ind is None:
                    sw /= 1 - fparams.learning_rate
                    sw[r_ind] = fparams.learning_rate
                    sw[r_ind] = sw[prev_ind] / (1 - fparams.learning_rate)

            sw /= sw.sum()

        self.previous_replace_ind = replace_ind.copy()
        self.num_stored_samples += 1
        return replace_ind

    def update_state(self, new_pos, new_scale):
        # Update scale
        self.target_scale = new_scale.clamp(self.min_scale_factor, self.max_scale_factor)
        self.target_sz = self.base_target_sz * self.target_scale

        # Update pos
        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.pos = torch.max(torch.min(new_pos, self.image_sz - inside_offset), inside_offset)

    def symmetrize_filter(self):
        for hf in self.filter:
            hf[:,:,:,0,:] /= 2
            hf[:,:,:,0,:] += complex.conj(hf[:,:,:,0,:].flip((2,)))
コード例 #5
ファイル: eco.py プロジェクト: yaolinhua/pytracking-master
 def preprocess_sample(self, x: TensorList) -> TensorList:
     x *= self.window
     sample_xf = fourier.cfft2(x)
     return TensorList([dcf.interpolate_dft(xf, bf) for xf, bf in zip(sample_xf, self.interp_fs)])
コード例 #6
 def size(self, input_sz):
     return TensorList([f.size(input_sz) for f in self.features if self._return_feature(f)]).unroll()
コード例 #7
 def get_fparams(self, name: str = None):
     if name is None:
         return [f.fparams for f in self.features if self._return_feature(f)]
     return TensorList([getattr(f.fparams, name) for f in self.features if self._return_feature(f)]).unroll()
コード例 #8
    def initialize(self, image, state, gt, *args, **kwargs):
        if len(gt) == 8:
            ww = gt[2] - gt[0]
            hh = gt[7] - gt[1]
            ww = gt[2]
            hh = gt[3]
        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'
        if ww < 25 and hh < 25:
            self.feature_sz = TensorList([torch.Tensor([28., 28.])])
            self.output_layer = TensorList(['layer2'])
            self.feature_sz = TensorList([torch.Tensor([14., 14.])])
            #     self.output_layer = TensorList(['layer3'])
            self.output_layer = TensorList(['layer3'])
        # Initialize some stuff
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        self.time = 0
        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])
        if state[3] > 50 or state[2] > 50:

            self.target_sz = torch.Tensor(
                [state[3] - state[3] / 8, state[2] - state[2] / 4])
            self.target_sz = torch.Tensor([state[3], state[2]])
        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /

        # Check if IoUNet is used
        self.use_iou_net = getattr(self.params, 'use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if getattr(self.params, 'search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(
                    torch.prod(self.base_target_sz *
                               self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz *
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
                2 * feat_max_stride)
            self.img_sample_sz += feat_max_stride - (
                self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)

        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long() * self.params.effective_search_area /
                self.output_window = dcf.hann2d(self.output_sz.long(),

        # Initialize some learning things

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize iounet
        if self.use_iou_net:

        # Initialize projection matrix

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        self.time += time.time() - tic
        self.pool1 = torch.nn.AdaptiveMaxPool2d((1, 224))
        self.pool2 = torch.nn.AdaptiveMaxPool2d((224, 1))
コード例 #9
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Overwrite some parameters in the classifier. (These are not generally changed)

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and self.params.get(
                'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            x = torch.cat([
                F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1),

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))
        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor(
            [ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2

        # Construct output window
        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    (self.output_sz * self.params.effective_search_area /
                self.output_window = dcf.hann2d(self.output_sz.long(),
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = self.params.get('net_opt_iter', None)

        # mask in Transformer
        self.transformer_label = prutils.gaussian_label_function(
            target_boxes.cpu().view(-1, 4),

        self.transformer_label = self.transformer_label.unsqueeze(1).cuda()
        self.x_clf = x

        self.transformer_memory, _ = self.net.classifier.transformer.encoder(
            self.x_clf.unsqueeze(1), pos=None)

        for i in range(x.shape[0]):
            _, cur_encoded_feat = self.net.classifier.transformer.decoder(
                x[i, ...].unsqueeze(0).unsqueeze(0),
            if i == 0:
                encoded_feat = cur_encoded_feat
                encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0)
        x = encoded_feat.contiguous()

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(
                x, target_boxes, num_iter=num_iter, compute_losses=plot_loss)

        # Init memory
        if self.params.get('update_classifier', True):
コード例 #10
 def apply_filters(self, sample_xf: TensorList) -> torch.Tensor:
     return TensorList([
         complex.mult(f, sample_xf).sum(1, keepdim=True)
         for f in self.filters
コード例 #11
 def extract_fourier_sample(self, im: torch.Tensor) -> TensorList:
     x = F.interpolate(im, self.output_sz.long().tolist(), mode='bilinear')
     x = TensorList([
         f.get_feature(x) for f in self.params.features.features
     return self.preprocess_sample(x)
コード例 #12
    def initialize(self, image, info: dict, gpu_device) -> dict:
        # Initialize some stuff
        self.frame_num = 1
        self.params.device = 'cuda:{0}'.format(
            gpu_device) if self.params.use_gpu else 'cpu'

        # Convert image
        im = numpy_to_torch(image)
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])

        # Initialize features

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.points = TensorList(
            [torch.Tensor([p[0], p[1]]) for p in info['points']])
        self.org_points = self.points.clone()
        self.target_sz = torch.Tensor(
            [info['target_sz'][0], info['target_sz'][1]])

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = self.image_sz.clone()
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.mid_point = self.img_support_sz // 2
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.img_support_sz  # Interpolated size of the output

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        #self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])
        self.window = TensorList([
            torch.ones((1, 1, int(sz[0].item()),
            for sz in self.feature_sz
        #self.window = TensorList([dcf.tukey2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.device) for sz in self.filter_sz

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.target_sz.prod()) * output_sigma_factor
        yf_zero = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)
        yf_zero = complex.complex(yf_zero)
        self.yf = TensorList()
        for p in self.points:
            shift_sample = 2 * math.pi * (self.mid_point -
                                          p) / self.img_support_sz
                    [fourier.shift_fs(yfs, shift_sample) for yfs in yf_zero]))

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)

        # Extract and transform sample
        x = self.generate_init_samples(im).to(self.params.device)
        self.x = x
        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)

        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Initialize memory
        # Initialize filter
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, xf.shape[1], 2)
            for xf in train_xf
        self.filters = TensorList([
                xf.new_zeros(1, xf.shape[1], xf.shape[2], xf.shape[3], 2)
                for xf in train_xf
            ]) for i in range(len(self.points))

        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
            sw[:num] = init_sw

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.target_sz,
            for fparams in self.fparams
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)

        for train_samp, init_samp in zip(self.training_samples,
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        sample_energy = complex.abs_sqr(self.training_samples).mean(
            dim=2, keepdim=True).permute(2, 3, 0, 1)
        # Do joint optimization
        for i in range(len(self.points)):
            print('{0}'.format(i), end=', ')
            ts = self.training_samples.clone()
            yf = self.yf[i]
            filters = self.filters[i]
            i_sw = self.init_sample_weights.clone()
            re = self.reg_energy.clone()
            sw = self.sample_weights.clone()
            rf = self.reg_filter.clone()
            filter_optimizer = FilterOptim(self.params, re)
            filter_optimizer.register(filters, ts, yf, sw, rf)
            filter_optimizer.sample_energy = sample_energy.clone()


            # Post optimization
            self.filters[i] = filter_optimizer.filter
コード例 #13
class CCOT(BaseTracker):
    def initialize_features(self, im):
        if not getattr(self, 'features_initialized', False):
        self.features_initialized = True

    def initialize(self, image, info: dict, gpu_device) -> dict:
        # Initialize some stuff
        self.frame_num = 1
        self.params.device = 'cuda:{0}'.format(
            gpu_device) if self.params.use_gpu else 'cpu'

        # Convert image
        im = numpy_to_torch(image)
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])

        # Initialize features

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.points = TensorList(
            [torch.Tensor([p[0], p[1]]) for p in info['points']])
        self.org_points = self.points.clone()
        self.target_sz = torch.Tensor(
            [info['target_sz'][0], info['target_sz'][1]])

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = self.image_sz.clone()
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.mid_point = self.img_support_sz // 2
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.img_support_sz  # Interpolated size of the output

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        #self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])
        self.window = TensorList([
            torch.ones((1, 1, int(sz[0].item()),
            for sz in self.feature_sz
        #self.window = TensorList([dcf.tukey2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.device) for sz in self.filter_sz

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.target_sz.prod()) * output_sigma_factor
        yf_zero = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)
        yf_zero = complex.complex(yf_zero)
        self.yf = TensorList()
        for p in self.points:
            shift_sample = 2 * math.pi * (self.mid_point -
                                          p) / self.img_support_sz
                    [fourier.shift_fs(yfs, shift_sample) for yfs in yf_zero]))

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)

        # Extract and transform sample
        x = self.generate_init_samples(im).to(self.params.device)
        self.x = x
        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)

        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Initialize memory
        # Initialize filter
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, xf.shape[1], 2)
            for xf in train_xf
        self.filters = TensorList([
                xf.new_zeros(1, xf.shape[1], xf.shape[2], xf.shape[3], 2)
                for xf in train_xf
            ]) for i in range(len(self.points))

        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
            sw[:num] = init_sw

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.target_sz,
            for fparams in self.fparams
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)

        for train_samp, init_samp in zip(self.training_samples,
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        sample_energy = complex.abs_sqr(self.training_samples).mean(
            dim=2, keepdim=True).permute(2, 3, 0, 1)
        # Do joint optimization
        for i in range(len(self.points)):
            print('{0}'.format(i), end=', ')
            ts = self.training_samples.clone()
            yf = self.yf[i]
            filters = self.filters[i]
            i_sw = self.init_sample_weights.clone()
            re = self.reg_energy.clone()
            sw = self.sample_weights.clone()
            rf = self.reg_filter.clone()
            filter_optimizer = FilterOptim(self.params, re)
            filter_optimizer.register(filters, ts, yf, sw, rf)
            filter_optimizer.sample_energy = sample_energy.clone()


            # Post optimization
            self.filters[i] = filter_optimizer.filter

    def track(self, image, update=False) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num
        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        test_xf = self.extract_fourier_sample(im)

        # Compute scores
        sfs = self.apply_filters(test_xf)
        out = TensorList([
            self.localize_and_update_target(sfs[i], i)
            for i in range(len(self.points))

        return out

    def apply_filters(self, sample_xf: TensorList) -> torch.Tensor:
        return TensorList([
            complex.mult(f, sample_xf).sum(1, keepdim=True)
            for f in self.filters

    def apply_filter(self, sample_xf: TensorList) -> torch.Tensor:
        return complex.mult(self.filter, sample_xf).sum(1, keepdim=True)

    def localize_and_update_target(self, sf: TensorList, i):
        if self.params.score_fusion_strategy == 'weightedsum':
            weight = self.fparams.attribute('translation_weight')
            sf = fourier.sum_fs(weight * sf)
            scores = fourier.sample_fs(sf, self.output_sz)
            raise ValueError('Unknown score fusion strategy.')

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            disp = (max_disp +
                    self.output_sz / 2) % self.output_sz - self.output_sz / 2
        elif self.params.score_fusion_strategy == 'transcale':
            disp = max_disp - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp.view(-1) * (self.img_support_sz /

        # Update pos
        new_pos = self.mid_point.round() + translation_vec

        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.points[i] = torch.max(
            torch.min(new_pos, self.image_sz - inside_offset), inside_offset)

        return self.points[i].round(), max_score, scores

    def extract_fourier_sample(self, im: torch.Tensor) -> TensorList:
        x = F.interpolate(im, self.output_sz.long().tolist(), mode='bilinear')
        x = TensorList([
            f.get_feature(x) for f in self.params.features.features
        return self.preprocess_sample(x)

    def preprocess_sample(self, x: TensorList) -> TensorList:
        x *= self.window
        sample_xf = fourier.cfft2(x)
        return TensorList([
            dcf.interpolate_dft(xf, bf)
            for xf, bf in zip(sample_xf, self.interp_fs)

    def generate_init_samples(self, im: torch.Tensor) -> TensorList:
        # Do data augmentation
        transforms = [augmentation.Identity()]
        if 'shift' in self.params.augmentation:
                for shift in self.params.augmentation['shift']
        if 'fliplr' in self.params.augmentation and self.params.augmentation[
        if 'rotate' in self.params.augmentation:
                for angle in self.params.augmentation['rotate']
        if 'blur' in self.params.augmentation:
                for sigma in self.params.augmentation['blur']

        im_patch = F.interpolate(im,
        im_patches = torch.cat([T(im_patch) for T in transforms])
        init_samples = TensorList([
            f.get_feature(im_patches) for f in self.params.features.features

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            for i, use_aug in enumerate(
                if use_aug:
                    init_samples[i] = torch.cat([
                        F.dropout2d(init_samples[i][0:1, ...].expand(
                            num, -1, -1, -1),

        return init_samples

    def symmetrize_filter(self):
        for f in self.filters:
            for hf in f:
                hf[:, :, :, 0, :] /= 2
                hf[:, :, :, 0, :] += complex.conj(hf[:, :, :, 0, :].flip(
                    (2, )))
コード例 #14
 def apply_filter(self, sample_x: TensorList):
     return TensorList([
         operation.conv2d(tmp_x, tmp_filter, mode='same')
         for tmp_x, tmp_filter in zip(sample_x, self.filter)
コード例 #15
ファイル: dimp.py プロジェクト: zhengfangwu/pytracking
    def refine_target_box(self,
        """Run the ATOM IoUNet to refine the target bounding box."""

        # Initial box for refinement
        init_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos,

        # Extract features from the relevant scale
        iou_features = self.get_iou_features(backbone_feat)
        iou_features = TensorList(
            [x[scale_ind:scale_ind + 1, ...] for x in iou_features])

        # Generate random initial boxes
        init_boxes = init_box.view(1, 4).clone()
        if self.params.num_init_random_boxes > 0:
            square_box_sz = init_box[2:].prod().sqrt()
            rand_factor = square_box_sz * torch.cat([
                self.params.box_jitter_pos * torch.ones(2),
                self.params.box_jitter_sz * torch.ones(2)

            minimal_edge_size = init_box[2:].min() / 3
            rand_bb = (torch.rand(self.params.num_init_random_boxes, 4) -
                       0.5) * rand_factor
            new_sz = (init_box[2:] + rand_bb[:, 2:]).clamp(minimal_edge_size)
            new_center = (init_box[:2] + init_box[2:] / 2) + rand_bb[:, :2]
            init_boxes = torch.cat([new_center - new_sz / 2, new_sz], 1)
            init_boxes = torch.cat([init_box.view(1, 4), init_boxes])

        # Optimize the boxes
        output_boxes, output_iou = self.optimize_boxes(iou_features,

        # Remove weird boxes
        output_boxes[:, 2:].clamp_(1)
        aspect_ratio = output_boxes[:, 2] / output_boxes[:, 3]
        keep_ind = (aspect_ratio < self.params.maximal_aspect_ratio) * (
            aspect_ratio > 1 / self.params.maximal_aspect_ratio)
        output_boxes = output_boxes[keep_ind, :]
        output_iou = output_iou[keep_ind]

        # If no box found
        if output_boxes.shape[0] == 0:

        # Predict box
        k = getattr(self.params, 'iounet_k', 5)
        topk = min(k, output_boxes.shape[0])
        _, inds = torch.topk(output_iou, topk)
        predicted_box = output_boxes[inds, :].mean(0)
        predicted_iou = output_iou.view(-1, 1)[inds, :].mean(0)

        # Get new position and size
        new_pos = predicted_box[:2] + predicted_box[2:] / 2
        new_pos = (new_pos.flip(
            (0, )) - (self.img_sample_sz - 1) / 2) * sample_scale + sample_pos
        new_target_sz = predicted_box[2:].flip((0, )) * sample_scale
        new_scale = torch.sqrt(new_target_sz.prod() /

        self.pos_iounet = new_pos.clone()

        if getattr(self.params, 'use_iounet_pos_for_learning', True):
            self.pos = new_pos.clone()

        self.target_sz = new_target_sz

        if update_scale:
            self.target_scale = new_scale
コード例 #16
ファイル: atom.py プロジェクト: dongfangduoshou123/pytracking
    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, sample_pos, sample_scales,

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target_no_fourier(
        new_pos = sample_pos + translation_vec

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params,
                                            True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(new_pos, sample_scales[scale_ind])

        if self.params.debug >= 2:
            show_tensor(s[scale_ind, ...],
                        title='Max score = {:.2f}'.format(
                            torch.max(s[scale_ind, ...]).item()))

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
        elif (self.frame_num - 1) % self.params.train_skipping == 0:

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        return new_state.tolist()
コード例 #17
 def stride(self):
     return torch.Tensor(TensorList([f.stride() for f in self.features if self._return_feature(f)]).unroll())
コード例 #18
class ECO(BaseTracker):
    def initialize_features(self):
        if not getattr(self, 'features_initialized', False):
        self.features_initialized = True

    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor(
            [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz *
        if search_area > self.params.max_image_sample_size:
            self.target_scale = math.sqrt(search_area /
        elif search_area < self.params.min_image_sample_size:
            self.target_scale = math.sqrt(search_area /

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale
        self.use_iou_net = True
        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(
                torch.prod(self.base_target_sz *
                           self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList(
            [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.device) for sz in self.filter_sz

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.base_target_sz,
            for fparams in self.fparams
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)

        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList(
            [e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([
            torch.svd(C)[0][:, :cdim].clone()
            for C, cdim in zip(cov_x, self.compressed_dim)

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,

        # Shift sample
        shift_samp = 2 * math.pi * (self.pos - self.pos.round()) / (
            self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        ## 初始化回归分支
        self.iou_img_sample_sz = self.img_sample_sz

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, cdim, 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)

        # Initialize filter
        self.filter = TensorList([
            xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2)
            for xf, cdim in zip(train_xf, self.compressed_dim)

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples,
                                                   self.yf, self.reg_filter,
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem,
                                             debug=(self.params.debug >= 3))

        if self.params.update_projection_matrix:
                self.params.init_CG_iter // self.params.init_GN_iter,

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples,
        for train_samp, init_samp in zip(self.training_samples,
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples,
                                       self.yf, self.sample_weights,
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone(

        if not self.params.update_projection_matrix:

        # Post optimization


    def get_iou_features(self):
        return self.params.features.get_unique_attribute('iounet_features')

    def get_iou_backbone_features(self):
        return self.params.features.get_unique_attribute(

    def init_dr_net(self):
        # Setup IoU net
        self.iou_predictor = self.params.features.get_unique_attribute(
        for p in self.iou_predictor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz,
        target_boxes = TensorList()
        self.params.iounet_augmentation = False
        if self.params.iounet_augmentation:
            for T in self.transforms:
                if not isinstance(
                        T, (augmentation.Identity, augmentation.Translation,
                            augmentation.FlipVertical, augmentation.Blur)):
                    self.iou_target_box +
                    torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
        target_boxes = torch.cat(target_boxes.view(1, 4),

        # Get iou features
        iou_backbone_features = self.get_iou_backbone_features()

        # Remove other augmentations such as rotation
        iou_backbone_features = TensorList(
            [x[:target_boxes.shape[0], ...] for x in iou_backbone_features])

        # Extract target feat
        with torch.no_grad():
            target_feat = self.iou_predictor.get_filter(
                iou_backbone_features, target_boxes)
        self.target_feat = TensorList(
            [x.detach().mean(0) for x in target_feat])

        if getattr(self.params, 'iounet_not_use_reference', False):
            self.target_feat = TensorList([
                                tf.norm() / tf.numel())
                for tf in self.target_feat

    def offset2box(self, init_box, offset):
        ctr_x = init_box[:, 0] + 0.5 * init_box[:, 2]
        ctr_y = init_box[:, 1] + 0.5 * init_box[:, 3]
        widths = init_box[:, 2]
        heights = init_box[:, 3]
        # ctr_x, ctr_y, widths, heights = init_box#xyxy2xywh(init_box)
        # print(ctr_x, ctr_y, widths, heights)

        wx, wy, ww, wh = 1, 1, 1, 1
        dx = offset[:, 0::4] / wx
        dy = offset[:, 1::4] / wy
        dw = offset[:, 2::4] / ww
        dh = offset[:, 3::4] / wh

        # Prevent sending too large values into np.exp()
        dw = torch.clamp(dw, max=np.log(1000. / 16.))
        dh = torch.clamp(dh, max=np.log(1000. / 16.))

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        pred_boxes = offset.new_zeros(offset.shape)
        # # x1
        # pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
        # # y1
        # pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
        # # x2
        # pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
        # # y2
        # pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
        pred_boxes[:, 0::4] = pred_ctr_x
        # y1
        pred_boxes[:, 1::4] = pred_ctr_y
        # x2
        pred_boxes[:, 2::4] = pred_w
        # y2
        pred_boxes[:, 3::4] = pred_h
        return pred_boxes

    def get_iounet_box(self, pos, sz, sample_pos, sample_scale):
        """All inputs in original image coordinates"""
        # print(self.iou_img_sample_sz,sample_scale)
        box_center = (pos - sample_pos) / sample_scale + (
            self.iou_img_sample_sz - 1) / 2
        box_sz = sz / sample_scale
        target_ul = box_center - (box_sz - 1) / 2
        # print(target_ul,box_sz)
        return torch.cat([target_ul.flip((0, )), box_sz.flip((0, ))])

    def predict_target_box(self,
        # print(self.pos,sample_pos,self.target_sz)
        init_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos,
        init_box = init_box.unsqueeze(0)
        init_box = init_box.unsqueeze(0)
        init_box = init_box.cuda()
        # print(init_box.shape)
        iou_features = self.get_iou_features()
        iou_features = TensorList(
            [x[scale_ind:scale_ind + 1, ...] for x in iou_features])
        reg = self.iou_predictor.predict_box(self.target_feat, iou_features,
        # print('reg',reg)
        init_box = init_box.view(-1, 4)
        reg = reg.view(-1, 4)

        predicted_box = self.offset2box(init_box, reg)

        # print(predicted_box.shape)
        predicted_box = predicted_box[0, :].cpu()
        # print(predicted_box.shape,self.iou_img_sample_sz.shape)
        new_pos = predicted_box[:2] - (self.iou_img_sample_sz - 1) / 2
        new_pos = new_pos.flip((0, )) * sample_scale + sample_pos
        new_target_sz = predicted_box[2:].flip((0, )) * sample_scale
        new_scale = torch.sqrt(new_target_sz.prod() /

        # Update position
        # new_pos = predicted_box[:2] + predicted_box[2:]/2 - (self.iou_img_sample_sz - 1) / 2
        # new_pos = new_pos.flip((0,)) * sample_scale + sample_pos
        # new_target_sz = predicted_box[2:].flip((0,)) * sample_scale
        # new_scale = torch.sqrt(new_target_sz.prod() / self.base_target_sz.prod())

        self.pos_drnet = new_pos.clone()
        # print('pos',self.pos,new_pos)

        self.pos = new_pos.clone()
        # print('target_sz',self.target_sz,new_target_sz)
        self.target_sz = new_target_sz

        self.target_scale = new_scale

    def track(self, image):

        self.frame_num += 1

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, self.pos, sample_scales,

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)

        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec,
                          self.target_scale * scale_change_factor)
        self.predict_target_box(sample_pos, sample_scales[scale_ind],

        if self.params.debug >= 2:
            show_tensor(s[scale_ind, ...], 5)
        if self.params.debug >= 3:
            for i, hf in enumerate(self.filter):
                show_tensor(fourier.sample_fs(hf).abs().mean(1), 6 + i)

        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList(
            [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2 * math.pi * (self.pos - sample_pos) / (
            sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Update memory

        # Train filter
        if self.frame_num % self.params.train_skipping == 1:
            self.filter_optimizer.run(self.params.CG_iter, train_xf)

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        return new_state.tolist()

    def apply_filter(self, sample_xf: TensorList) -> torch.Tensor:
        return complex.mult(self.filter, sample_xf).sum(1, keepdim=True)

    def localize_target(self, sf: TensorList):
        if self.params.score_fusion_strategy == 'sum':
            scores = fourier.sample_fs(fourier.sum_fs(sf), self.output_sz)
        elif self.params.score_fusion_strategy == 'weightedsum':
            weight = self.fparams.attribute('translation_weight')
            scores = fourier.sample_fs(fourier.sum_fs(weight * sf),
        elif self.params.score_fusion_strategy == 'transcale':
            alpha = self.fparams.attribute('scale_weight')
            beta = self.fparams.attribute('translation_weight')
            sample_sz = torch.round(
                self.output_sz.view(1, -1) *
                self.params.scale_factors.view(-1, 1))
            scores = 0
            for sfe, a, b in zip(sf, alpha, beta):
                sfe = fourier.shift_fs(sfe, math.pi * torch.ones(2))
                scores_scales = []
                for sind, sz in enumerate(sample_sz):
                    pd = (self.output_sz - sz) / 2
                        F.pad(fourier.sample_fs(sfe[sind:sind + 1, ...], sz),
                              (math.floor(pd[1].item()), math.ceil(
                                  pd[1].item()), math.floor(
                                      pd[0].item()), math.ceil(pd[0].item()))))
                scores_cat = torch.cat(scores_scales)
                scores = scores + (b - a) * scores_cat.mean(
                    dim=0, keepdim=True) + a * scores_cat
            raise ValueError('Unknown score fusion strategy.')

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score, dim=0)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            disp = (max_disp +
                    self.output_sz / 2) % self.output_sz - self.output_sz / 2
        elif self.params.score_fusion_strategy == 'transcale':
            disp = max_disp - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp[scale_ind, ...].view(-1) * (
            self.img_support_sz / self.output_sz) * self.target_scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            translation_vec *= self.params.scale_factors[scale_ind]

        return translation_vec, scale_ind, scores

    def extract_sample(self, im: torch.Tensor, pos: torch.Tensor, scales,
                       sz: torch.Tensor):
        return self.params.features.extract(im, pos, scales, sz)

    def extract_fourier_sample(self, im: torch.Tensor, pos: torch.Tensor,
                               scales, sz: torch.Tensor) -> TensorList:
        x = self.extract_sample(im, pos, scales, sz)
        return self.preprocess_sample(self.project_sample(x))

    def preprocess_sample(self, x: TensorList) -> TensorList:
        x *= self.window
        sample_xf = fourier.cfft2(x)
        return TensorList([
            dcf.interpolate_dft(xf, bf)
            for xf, bf in zip(sample_xf, self.interp_fs)

    def project_sample(self, x: TensorList):
        def _project_sample(x: torch.Tensor, P: torch.Tensor):
            if P is None:
                return x
            return torch.matmul(x.permute(2, 3, 0, 1), P).permute(2, 3, 0, 1)

        return _project_sample(x, self.projection_matrix)

    def generate_init_samples(self, im: torch.Tensor) -> TensorList:
        # Do data augmentation
        transforms = [augmentation.Identity()]
        if 'shift' in self.params.augmentation:
                for shift in self.params.augmentation['shift']
        if 'fliplr' in self.params.augmentation and self.params.augmentation[
        if 'rotate' in self.params.augmentation:
                for angle in self.params.augmentation['rotate']
        if 'blur' in self.params.augmentation:
                for sigma in self.params.augmentation['blur']

        init_samples = self.params.features.extract_transformed(
            im, self.pos, self.target_scale, self.img_sample_sz, transforms)

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            for i, use_aug in enumerate(
                if use_aug:
                    init_samples[i] = torch.cat([
                        F.dropout2d(init_samples[i][0:1, ...].expand(
                            num, -1, -1, -1),

        return init_samples

    def update_memory(self, sample_xf: TensorList):
        # Update weights and get index to replace
        replace_ind = self.update_sample_weights()
        for train_samp, xf, ind in zip(self.training_samples, sample_xf,
            train_samp[:, :, ind:ind + 1, :, :] = xf.permute(2, 3, 0, 1, 4)

    def update_sample_weights(self):
        replace_ind = []
        for sw, prev_ind, num_samp, fparams in zip(self.sample_weights,
            if num_samp == 0 or fparams.learning_rate == 1:
                sw[:] = 0
                sw[0] = 1
                r_ind = 0
                # Get index to replace
                _, r_ind = torch.min(sw, 0)
                r_ind = r_ind.item()

                # Update weights
                if prev_ind is None:
                    sw /= 1 - fparams.learning_rate
                    sw[r_ind] = fparams.learning_rate
                    sw[r_ind] = sw[prev_ind] / (1 - fparams.learning_rate)

            sw /= sw.sum()

        self.previous_replace_ind = replace_ind.copy()
        self.num_stored_samples += 1
        return replace_ind

    def update_state(self, new_pos, new_scale):
        # Update scale
        self.target_scale = new_scale.clamp(self.min_scale_factor,
        self.target_sz = self.base_target_sz * self.target_scale

        # Update pos
        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.pos = torch.max(torch.min(new_pos, self.image_sz - inside_offset),

    def symmetrize_filter(self):
        for hf in self.filter:
            hf[:, :, :, 0, :] /= 2
            hf[:, :, :, 0, :] += complex.conj(hf[:, :, :, 0, :].flip((2, )))
コード例 #19
 def dim(self):
     return TensorList([f.dim() for f in self.features if self._return_feature(f)]).unroll()
コード例 #20
ファイル: atom_lof.py プロジェクト: zzfpython/MetricNet
    def track(self, image) -> dict:
        # print('track',torch.rand(2))
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im  # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, self.pos, sample_scales,

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params,
                                            True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind],
                                       scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec,

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
                        title='Max score = {:.2f}'.format(max_score))

        # metricnet
        if self.use_iou_net and flag != 'not_found':
            pos_tmp = self.pos_iounet.clone()
            pos_tmp = self.pos
        state_tmp = torch.cat(
            (pos_tmp[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))
        state_tmp = state_tmp.numpy()
        with torch.no_grad():
            current_target_metric_feature0 = get_target_feature(
                self.metric_model, state_tmp, np.array(image))
            current_target_metric_feature = current_target_metric_feature0.cpu(
        # success, target_dist = judge_success(self.metric_model, current_target_metric_feature,
        #                                      self.target_metric_feature, self.params)
        lof_score, success = lof(current_target_metric_feature,

        if self.frame_num <= self.params.train_skipping:
            self.lof_thresh = (self.lof_thresh *
                               (self.frame_num - 2) + lof_score *
                               self.params.lof_rate) / (self.frame_num - 1)
            if self.frame_num == self.params.train_skipping:
                print('lof-thresh', self.lof_thresh)
        # print(self.frame_num, ':    lof:', lof_score, '  ', success)
        if success:
            for ii in range(len(self.target_features_all) - 1, -1, -1):
                dist = torch.norm(self.target_features_all[ii] -
                if dist < self.similar:
                    success = 0

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        if self.frame_num > self.params.train_skipping:
            update_flag = update_flag and success
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList(
                [x[scale_ind:scale_ind + 1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos,

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
        elif (self.frame_num - 1) % self.params.train_skipping == 0:

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat(
            (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2,
             self.target_sz[[1, 0]]))

        out = {'target_bbox': new_state.tolist(), 'max_score': max_score}
        return out
コード例 #21
 def get_attribute(self, name: str, ignore_missing: bool = False):
     if ignore_missing:
         return TensorList([getattr(f, name) for f in self.features if self._return_feature(f) and hasattr(f, name)])
         return TensorList([getattr(f, name, None) for f in self.features if self._return_feature(f)])
コード例 #22
ファイル: depth.py プロジェクト: yjybuaa/pytracking
    def init_optimization(self, train_x, init_y):
        # Initialize filter
        filter_init_method = self.params.get('filter_init_method', 'zeros')
        self.filter = TensorList(
            [x.new_zeros(1, cdim, sz[0], sz[1]) for x, cdim, sz in zip(train_x, self.compressed_dim, self.kernel_size)])
        if filter_init_method == 'zeros':
        elif filter_init_method == 'randn':
            for f in self.filter:
                f.normal_(0, 1/f.numel())
            raise ValueError('Unknown "filter_init_method"')

        # Get parameters
        self.params.update_projection_matrix = self.params.get('update_projection_matrix', True) and self.params.use_projection_matrix
        optimizer = self.params.get('optimizer', 'GaussNewtonCG')

        # Setup factorized joint optimization
        if self.params.update_projection_matrix:
            self.joint_problem = FactorizedConvProblem(self.init_training_samples, init_y, self.filter_reg,
                                                       self.fparams.attribute('projection_reg'), self.params, self.init_sample_weights,
                                                       self.projection_activation, self.response_activation)

            # Variable containing both filter and projection matrix
            joint_var = self.filter.concat(self.projection_matrix)

            # Initialize optimizer
            analyze_convergence = self.params.get('analyze_convergence', False)
            if optimizer == 'GaussNewtonCG':
                self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug >= 1),
                                                     plotting=(self.params.debug >= 3), analyze=analyze_convergence,
            elif optimizer == 'GradientDescentL2':
                self.joint_optimizer = GradientDescentL2(self.joint_problem, joint_var, self.params.optimizer_step_length, self.params.optimizer_momentum, plotting=(self.params.debug >= 3), debug=(self.params.debug >= 1),

            # Do joint optimization
            if isinstance(self.params.init_CG_iter, (list, tuple)):
                self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

            if analyze_convergence:
                opt_name = 'CG' if self.params.get('CG_optimizer', True) else 'GD'
                for val_name, values in zip(['loss', 'gradient'], [self.joint_optimizer.losses, self.joint_optimizer.gradient_mags]):
                    val_str = ' '.join(['{:.8e}'.format(v.item()) for v in values])
                    file_name = '{}_{}.txt'.format(opt_name, val_name)
                    with open(file_name, 'a') as f:
                        f.write(val_str + '\n')
                raise RuntimeError('Exiting')

        # Re-project samples with the new projection matrix
        compressed_samples = self.project_sample(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:init_samp.shape[0],...] = init_samp

        self.hinge_mask = None

        # Initialize optimizer
        self.conv_problem = ConvProblem(self.training_samples, self.y, self.filter_reg, self.sample_weights, self.response_activation)

        if optimizer == 'GaussNewtonCG':
            self.filter_optimizer = ConjugateGradient(self.conv_problem, self.filter, fletcher_reeves=self.params.fletcher_reeves,
                                                      direction_forget_factor=self.params.direction_forget_factor, debug=(self.params.debug>=1),
                                                      plotting=(self.params.debug>=3), visdom=self.visdom)
        elif optimizer == 'GradientDescentL2':
            self.filter_optimizer = GradientDescentL2(self.conv_problem, self.filter, self.params.optimizer_step_length,
                                                      self.params.optimizer_momentum, debug=(self.params.debug >= 1),
                                                      plotting=(self.params.debug>=3), visdom=self.visdom)

        # Transfer losses from previous optimization
        if self.params.update_projection_matrix:
            self.filter_optimizer.residuals = self.joint_optimizer.residuals
            self.filter_optimizer.losses = self.joint_optimizer.losses

        if not self.params.update_projection_matrix:

        # Post optimization

        # Free memory
        del self.init_training_samples
        if self.params.use_projection_matrix:
            del self.joint_problem, self.joint_optimizer
コード例 #23
ファイル: eco.py プロジェクト: yaolinhua/pytracking-master
    def initialize(self, image, state, *args, **kwargs):

        # Initialize some stuff
        self.frame_num = 1
        if not hasattr(self.params, 'device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.min_image_sample_size)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2)
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz    # Interpolated size of the output
        self.compressed_dim = self.fparams.attribute('compressed_dim')

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                                self.params.interpolation_bicubic_a, self.params.interpolation_centering,
                                                self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz])

        # Get regularization filter
        self.reg_filter = TensorList([dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device)
                                      for fparams in self.fparams])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(self.base_target_sz.prod()) * output_sigma_factor
        self.yf = TensorList([dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma)])

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute('learning_rate')
        if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate

        # Convert image
        im = numpy_to_torch(image)

        # Setup bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im)

        # Initialize projection matrix
        x_mat = TensorList([e.permute(1,0,2,3).reshape(e.shape[1], -1).clone() for e in x])
        x_mat -= x_mat.mean(dim=1, keepdim=True)
        cov_x = x_mat @ x_mat.t()
        self.projection_matrix = TensorList([torch.svd(C)[0][:,:cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim)])

        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(shift) / self.img_support_sz
                    xf[1+i:2+i,...] = fourier.shift_fs(xf[1+i:2+i,...], shift=shift_samp)

        # Shift sample
        shift_samp = 2*math.pi * (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)
        self.init_sample_weights = TensorList([xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None]*len(self.num_stored_samples)
        self.sample_weights = TensorList([xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList(
            [xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Initialize filter
        self.filter = TensorList(
            [xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim)])

        # Do joint optimization
        self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights)
        joint_var = self.filter.concat(self.projection_matrix)
        self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug>=3))

        if self.params.update_projection_matrix:
            self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

        # Re-project samples with the new projection matrix
        compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:,:,:init_samp.shape[2],:,:] = init_samp

        # Initialize optimizer
        self.filter_optimizer = FilterOptim(self.params, self.reg_energy)
        self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter)
        self.filter_optimizer.sample_energy = self.joint_problem.sample_energy
        self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone()

        if not self.params.update_projection_matrix:

        # Post optimization

コード例 #24
ファイル: depth.py プロジェクト: yjybuaa/pytracking
class Depth(BaseTracker):

    multiobj_mode = 'parallel'

    def initialize_features(self):
        if not getattr(self, 'features_initialized', False):
        self.features_initialized = True

    def initialize(self, image, depth, info: dict) -> dict:
        state = info['init_bbox']

        # Initialize some stuff
        self.frame_num = 1
        if not self.params.has('device'):
            self.params.device = 'cuda' if self.params.use_gpu else 'cpu'

        # Initialize features

        # Check if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        tic = time.time()

        # Get position and size
        self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2])
        self.target_sz = torch.Tensor([state[3], state[2]])

        # Set search area
        self.target_scale = 1.0
        search_area = torch.prod(self.target_sz * self.params.search_area_scale).item()
        if search_area > self.params.max_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.max_image_sample_size)
        elif search_area < self.params.min_image_sample_size:
            self.target_scale =  math.sqrt(search_area / self.params.min_image_sample_size)

        # Check if IoUNet is used
        self.use_iou_net = self.params.get('use_iou_net', True)

        # Target size in base scale
        self.base_target_sz = self.target_sz / self.target_scale

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        if self.params.get('search_area_shape', 'square') == 'square':
            self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2)
        elif self.params.search_area_shape == 'initrect':
            self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale)
            raise ValueError('Unknown search area shape')
        if self.params.feature_size_odd:
            self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride)
            self.img_sample_sz += feat_max_stride - (self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride)

        # Set sizes
        self.img_support_sz = self.img_sample_sz
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.output_sz = self.params.score_upsample_factor * self.img_support_sz  # Interpolated size of the output
        self.kernel_size = self.fparams.attribute('kernel_size')

        self.iou_img_sample_sz = self.img_sample_sz

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute('learning_rate')
        if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
            self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate

        self.output_window = None
        if self.params.get('window_output', False):
            if self.params.get('use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device)
                self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to(self.params.device)

        # Initialize some learning things

        # Convert image
        im = numpy_to_torch(image)
        self.im = im    # For debugging only

        # Convert depth
        dp = numpy_to_torch(depth)
        self.dp = dp

        # Setup scale bounds
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])
        self.min_scale_factor = torch.max(10 / self.base_target_sz)
        self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz)

        # Extract and transform sample
        x = self.generate_init_samples(im, dp)

        # Initialize iounet
        if self.use_iou_net:

        # Initialize projection matrix

        # Transform to get the training sample
        train_x = self.preprocess_sample(x)

        # Generate label function
        init_y = self.init_label_function(train_x)

        # Init memory

        # Init optimizer and do initial optimization
        self.init_optimization(train_x, init_y)

        self.pos_iounet = self.pos.clone()

        out = {'time': time.time() - tic}
        return out

    def init_optimization(self, train_x, init_y):
        # Initialize filter
        filter_init_method = self.params.get('filter_init_method', 'zeros')
        self.filter = TensorList(
            [x.new_zeros(1, cdim, sz[0], sz[1]) for x, cdim, sz in zip(train_x, self.compressed_dim, self.kernel_size)])
        if filter_init_method == 'zeros':
        elif filter_init_method == 'randn':
            for f in self.filter:
                f.normal_(0, 1/f.numel())
            raise ValueError('Unknown "filter_init_method"')

        # Get parameters
        self.params.update_projection_matrix = self.params.get('update_projection_matrix', True) and self.params.use_projection_matrix
        optimizer = self.params.get('optimizer', 'GaussNewtonCG')

        # Setup factorized joint optimization
        if self.params.update_projection_matrix:
            self.joint_problem = FactorizedConvProblem(self.init_training_samples, init_y, self.filter_reg,
                                                       self.fparams.attribute('projection_reg'), self.params, self.init_sample_weights,
                                                       self.projection_activation, self.response_activation)

            # Variable containing both filter and projection matrix
            joint_var = self.filter.concat(self.projection_matrix)

            # Initialize optimizer
            analyze_convergence = self.params.get('analyze_convergence', False)
            if optimizer == 'GaussNewtonCG':
                self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug >= 1),
                                                     plotting=(self.params.debug >= 3), analyze=analyze_convergence,
            elif optimizer == 'GradientDescentL2':
                self.joint_optimizer = GradientDescentL2(self.joint_problem, joint_var, self.params.optimizer_step_length, self.params.optimizer_momentum, plotting=(self.params.debug >= 3), debug=(self.params.debug >= 1),

            # Do joint optimization
            if isinstance(self.params.init_CG_iter, (list, tuple)):
                self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter)

            if analyze_convergence:
                opt_name = 'CG' if self.params.get('CG_optimizer', True) else 'GD'
                for val_name, values in zip(['loss', 'gradient'], [self.joint_optimizer.losses, self.joint_optimizer.gradient_mags]):
                    val_str = ' '.join(['{:.8e}'.format(v.item()) for v in values])
                    file_name = '{}_{}.txt'.format(opt_name, val_name)
                    with open(file_name, 'a') as f:
                        f.write(val_str + '\n')
                raise RuntimeError('Exiting')

        # Re-project samples with the new projection matrix
        compressed_samples = self.project_sample(self.init_training_samples, self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples, compressed_samples):
            train_samp[:init_samp.shape[0],...] = init_samp

        self.hinge_mask = None

        # Initialize optimizer
        self.conv_problem = ConvProblem(self.training_samples, self.y, self.filter_reg, self.sample_weights, self.response_activation)

        if optimizer == 'GaussNewtonCG':
            self.filter_optimizer = ConjugateGradient(self.conv_problem, self.filter, fletcher_reeves=self.params.fletcher_reeves,
                                                      direction_forget_factor=self.params.direction_forget_factor, debug=(self.params.debug>=1),
                                                      plotting=(self.params.debug>=3), visdom=self.visdom)
        elif optimizer == 'GradientDescentL2':
            self.filter_optimizer = GradientDescentL2(self.conv_problem, self.filter, self.params.optimizer_step_length,
                                                      self.params.optimizer_momentum, debug=(self.params.debug >= 1),
                                                      plotting=(self.params.debug>=3), visdom=self.visdom)

        # Transfer losses from previous optimization
        if self.params.update_projection_matrix:
            self.filter_optimizer.residuals = self.joint_optimizer.residuals
            self.filter_optimizer.losses = self.joint_optimizer.losses

        if not self.params.update_projection_matrix:

        # Post optimization

        # Free memory
        del self.init_training_samples
        if self.params.use_projection_matrix:
            del self.joint_problem, self.joint_optimizer

    def track(self, image, depth, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im    # For debugging only

        #convert depth
        dp = numpy_to_torch(depth)
        self.dp = dp    # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, dp, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = self.params.get('update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(sample_pos + translation_vec, sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos, sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
        elif (self.frame_num-1) % self.params.train_skipping == 0:

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        out = {'target_bbox': new_state.tolist()}
        return out

    def apply_filter(self, sample_x: TensorList):
        return operation.conv2d(sample_x, self.filter, mode='same')

    def localize_target(self, scores_raw):
        # Weighted sum (if multiple features) with interpolation in fourier domain
        weight = self.fparams.attribute('translation_weight', 1.0)
        scores_raw = weight * scores_raw
        sf_weighted = fourier.cfft2(scores_raw) / (scores_raw.size(2) * scores_raw.size(3))
        for i, (sz, ksz) in enumerate(zip(self.feature_sz, self.kernel_size)):
            sf_weighted[i] = fourier.shift_fs(sf_weighted[i], math.pi * (1 - torch.Tensor([ksz[0]%2, ksz[1]%2]) / sz))

        scores_fs = fourier.sum_fs(sf_weighted)
        scores = fourier.sample_fs(scores_fs, self.output_sz)

        if self.output_window is not None and not self.params.get('perform_hn_without_windowing', False):
            scores *= self.output_window

        if self.params.get('advanced_localization', False):
            return self.localize_advanced(scores)

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score, dim=0)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp[scale_ind, ...].view(-1) * (self.img_support_sz / self.output_sz) * self.target_scale
        translation_vec *= self.params.scale_factors[scale_ind]

        # Shift the score output for visualization purposes
        if self.params.debug >= 2:
            sz = scores.shape[-2:]
            scores = torch.cat([scores[...,sz[0]//2:,:], scores[...,:sz[0]//2,:]], -2)
            scores = torch.cat([scores[...,:,sz[1]//2:], scores[...,:,:sz[1]//2]], -1)

        return translation_vec, scale_ind, scores, None

    def localize_advanced(self, scores):
        """Dows the advanced localization with hard negative detection and target not found."""

        sz = scores.shape[-2:]

        if self.output_window is not None and self.params.get('perform_hn_without_windowing', False):
            scores_orig = scores.clone()

            scores_orig = torch.cat([scores_orig[..., (sz[0] + 1) // 2:, :], scores_orig[..., :(sz[0] + 1) // 2, :]], -2)
            scores_orig = torch.cat([scores_orig[..., :, (sz[1] + 1) // 2:], scores_orig[..., :, :(sz[1] + 1) // 2]], -1)

            scores *= self.output_window

        # Shift scores back
        scores = torch.cat([scores[...,(sz[0]+1)//2:,:], scores[...,:(sz[0]+1)//2,:]], -2)
        scores = torch.cat([scores[...,:,(sz[1]+1)//2:], scores[...,:,:(sz[1]+1)//2]], -1)

        # Find maximum
        max_score1, max_disp1 = dcf.max2d(scores)
        _, scale_ind = torch.max(max_score1, dim=0)
        max_score1 = max_score1[scale_ind]
        max_disp1 = max_disp1[scale_ind,...].float().cpu().view(-1)
        target_disp1 = max_disp1 - self.output_sz // 2
        translation_vec1 = target_disp1 * (self.img_support_sz / self.output_sz) * self.target_scale

        if max_score1.item() < self.params.target_not_found_threshold:
            return translation_vec1, scale_ind, scores, 'not_found'

        if self.output_window is not None and self.params.get('perform_hn_without_windowing', False):
            scores = scores_orig

        # Mask out target neighborhood
        target_neigh_sz = self.params.target_neighborhood_scale * self.target_sz / self.target_scale
        tneigh_top = max(round(max_disp1[0].item() - target_neigh_sz[0].item() / 2), 0)
        tneigh_bottom = min(round(max_disp1[0].item() + target_neigh_sz[0].item() / 2 + 1), sz[0])
        tneigh_left = max(round(max_disp1[1].item() - target_neigh_sz[1].item() / 2), 0)
        tneigh_right = min(round(max_disp1[1].item() + target_neigh_sz[1].item() / 2 + 1), sz[1])
        scores_masked = scores[scale_ind:scale_ind+1,...].clone()
        scores_masked[...,tneigh_top:tneigh_bottom,tneigh_left:tneigh_right] = 0

        # Find new maximum
        max_score2, max_disp2 = dcf.max2d(scores_masked)
        max_disp2 = max_disp2.float().cpu().view(-1)
        target_disp2 = max_disp2 - self.output_sz // 2
        translation_vec2 = target_disp2 * (self.img_support_sz / self.output_sz) * self.target_scale

        # Handle the different cases
        if max_score2 > self.params.distractor_threshold * max_score1:
            disp_norm1 = torch.sqrt(torch.sum(target_disp1**2))
            disp_norm2 = torch.sqrt(torch.sum(target_disp2**2))
            disp_threshold = self.params.dispalcement_scale * math.sqrt(sz[0] * sz[1]) / 2

            if disp_norm2 > disp_threshold and disp_norm1 < disp_threshold:
                return translation_vec1, scale_ind, scores, 'hard_negative'
            if disp_norm2 < disp_threshold and disp_norm1 > disp_threshold:
                return translation_vec2, scale_ind, scores, 'hard_negative'
            if disp_norm2 > disp_threshold and disp_norm1 > disp_threshold:
                return translation_vec1, scale_ind, scores, 'uncertain'

            # If also the distractor is close, return with highest score
            return translation_vec1, scale_ind, scores, 'uncertain'

        if max_score2 > self.params.hard_negative_threshold * max_score1 and max_score2 > self.params.target_not_found_threshold:
            return translation_vec1, scale_ind, scores, 'hard_negative'

        return translation_vec1, scale_ind, scores, None

    def extract_sample(self, im: torch.Tensor, dp: torch.Tensor, pos: torch.Tensor, scales, sz: torch.Tensor):
        return self.params.features.extract(im, dp, pos, scales, sz)[0]

    def get_iou_features(self):
        return self.params.features.get_unique_attribute('iounet_features')

    def get_iou_backbone_features(self):
        return self.params.features.get_unique_attribute('iounet_backbone_features')

    def extract_processed_sample(self, im: torch.Tensor, dp: torch.Tensor, pos: torch.Tensor, scales, sz: torch.Tensor) -> (TensorList, TensorList):
        x = self.extract_sample(im, dp, pos, scales, sz)
        return self.preprocess_sample(self.project_sample(x))

    def preprocess_sample(self, x: TensorList) -> (TensorList, TensorList):
        if self.params.get('_feature_window', False):
            x = x * self.feature_window
        return x

    def project_sample(self, x: TensorList, proj_matrix = None):
        # Apply projection matrix
        if proj_matrix is None:
            proj_matrix = self.projection_matrix
        return operation.conv2d(x, proj_matrix).apply(self.projection_activation)

    def init_learning(self):
        # Get window function
        self.feature_window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Filter regularization
        self.filter_reg = self.fparams.attribute('filter_reg')

        # Activation function after the projection matrix (phi_1 in the paper)
        projection_activation = self.params.get('projection_activation', 'none')
        if isinstance(projection_activation, tuple):
            projection_activation, act_param = projection_activation

        if projection_activation == 'none':
            self.projection_activation = lambda x: x
        elif projection_activation == 'relu':
            self.projection_activation = torch.nn.ReLU(inplace=True)
        elif projection_activation == 'elu':
            self.projection_activation = torch.nn.ELU(inplace=True)
        elif projection_activation == 'mlu':
            self.projection_activation = lambda x: F.elu(F.leaky_relu(x, 1 / act_param), act_param)
            raise ValueError('Unknown activation')

        # Activation function after the output scores (phi_2 in the paper)
        response_activation = self.params.get('response_activation', 'none')
        if isinstance(response_activation, tuple):
            response_activation, act_param = response_activation

        if response_activation == 'none':
            self.response_activation = lambda x: x
        elif response_activation == 'relu':
            self.response_activation = torch.nn.ReLU(inplace=True)
        elif response_activation == 'elu':
            self.response_activation = torch.nn.ELU(inplace=True)
        elif response_activation == 'mlu':
            self.response_activation = lambda x: F.elu(F.leaky_relu(x, 1 / act_param), act_param)
            raise ValueError('Unknown activation')

    def generate_init_samples(self, im: torch.Tensor, dp: torch.Tensor) -> TensorList:
        """Generate augmented initial samples."""

        # Compute augmentation size
        aug_expansion_factor = self.params.get('augmentation_expansion_factor', None)
        aug_expansion_sz = self.img_sample_sz.clone()
        aug_output_sz = None
        if aug_expansion_factor is not None and aug_expansion_factor != 1:
            aug_expansion_sz = (self.img_sample_sz * aug_expansion_factor).long()
            aug_expansion_sz += (aug_expansion_sz - self.img_sample_sz.long()) % 2
            aug_expansion_sz = aug_expansion_sz.float()
            aug_output_sz = self.img_sample_sz.long().tolist()

        # Random shift operator
        get_rand_shift = lambda: None
        random_shift_factor = self.params.get('random_shift_factor', 0)
        if random_shift_factor > 0:
            get_rand_shift = lambda: ((torch.rand(2) - 0.5) * self.img_sample_sz * random_shift_factor).long().tolist()

        # Create transofmations
        self.transforms = [augmentation.Identity(aug_output_sz)]
        if 'shift' in self.params.augmentation:
            self.transforms.extend([augmentation.Translation(shift, aug_output_sz) for shift in self.params.augmentation['shift']])
        if 'relativeshift' in self.params.augmentation:
            get_absolute = lambda shift: (torch.Tensor(shift) * self.img_sample_sz/2).long().tolist()
            self.transforms.extend([augmentation.Translation(get_absolute(shift), aug_output_sz) for shift in self.params.augmentation['relativeshift']])
        if 'fliplr' in self.params.augmentation and self.params.augmentation['fliplr']:
            self.transforms.append(augmentation.FlipHorizontal(aug_output_sz, get_rand_shift()))
        if 'blur' in self.params.augmentation:
            self.transforms.extend([augmentation.Blur(sigma, aug_output_sz, get_rand_shift()) for sigma in self.params.augmentation['blur']])
        if 'scale' in self.params.augmentation:
            self.transforms.extend([augmentation.Scale(scale_factor, aug_output_sz, get_rand_shift()) for scale_factor in self.params.augmentation['scale']])
        if 'rotate' in self.params.augmentation:
            self.transforms.extend([augmentation.Rotate(angle, aug_output_sz, get_rand_shift()) for angle in self.params.augmentation['rotate']])

        # Generate initial samples
        init_samples = self.params.features.extract_transformed(im, dp, self.pos, self.target_scale, aug_expansion_sz, self.transforms)

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(self.fparams.attribute('use_augmentation')):
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        # Add dropout samples
        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            for i, use_aug in enumerate(self.fparams.attribute('use_augmentation')):
                if use_aug:
                    init_samples[i] = torch.cat([init_samples[i], F.dropout2d(init_samples[i][0:1,...].expand(num,-1,-1,-1), p=prob, training=True)])

        return init_samples

    def init_projection_matrix(self, x):
        # Set if using projection matrix
        self.params.use_projection_matrix = self.params.get('use_projection_matrix', True)

        if self.params.use_projection_matrix:
            self.compressed_dim = self.fparams.attribute('compressed_dim', None)

            proj_init_method = self.params.get('proj_init_method', 'pca')
            if proj_init_method == 'pca':
                x_mat = TensorList([e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone() for e in x])
                x_mat -= x_mat.mean(dim=1, keepdim=True)
                cov_x = x_mat @ x_mat.t()
                self.projection_matrix = TensorList(
                    [None if cdim is None else torch.svd(C)[0][:, :cdim].t().unsqueeze(-1).unsqueeze(-1).clone() for C, cdim in
                     zip(cov_x, self.compressed_dim)])
            elif proj_init_method == 'randn':
                self.projection_matrix = TensorList(
                    [None if cdim is None else ex.new_zeros(cdim,ex.shape[1],1,1).normal_(0,1/math.sqrt(ex.shape[1])) for ex, cdim in
                     zip(x, self.compressed_dim)])
            self.compressed_dim = x.size(1)
            self.projection_matrix = TensorList([None]*len(x))

    def init_label_function(self, train_x):
        # Allocate label function
        self.y = TensorList([x.new_zeros(self.params.sample_memory_size, 1, x.shape[2], x.shape[3]) for x in train_x])

        # Output sigma factor
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        self.sigma = (self.feature_sz / self.img_support_sz * self.base_target_sz).prod().sqrt() * output_sigma_factor * torch.ones(2)

        # Center pos in normalized coords
        target_center_norm = (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz)

        # Generate label functions
        for y, sig, sz, ksz, x in zip(self.y, self.sigma, self.feature_sz, self.kernel_size, train_x):
            center_pos = sz * target_center_norm + 0.5 * torch.Tensor([(ksz[0] + 1) % 2, (ksz[1] + 1) % 2])
            for i, T in enumerate(self.transforms[:x.shape[0]]):
                sample_center = center_pos + torch.Tensor(T.shift) / self.img_support_sz * sz
                y[i, 0, ...] = dcf.label_function_spatial(sz, sig, sample_center)

        # Return only the ones to use for initial training
        return TensorList([y[:x.shape[0], ...] for y, x in zip(self.y, train_x)])

    def init_memory(self, train_x):
        # Initialize first-frame training samples
        self.num_init_samples = train_x.size(0)
        self.init_sample_weights = TensorList([x.new_ones(1) / x.shape[0] for x in train_x])
        self.init_training_samples = train_x

        # Sample counters and weights
        self.num_stored_samples = self.num_init_samples.copy()
        self.previous_replace_ind = [None] * len(self.num_stored_samples)
        self.sample_weights = TensorList([x.new_zeros(self.params.sample_memory_size) for x in train_x])
        for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, self.num_init_samples):
            sw[:num] = init_sw

        # Initialize memory
        self.training_samples = TensorList(
            [x.new_zeros(self.params.sample_memory_size, cdim, x.shape[2], x.shape[3]) for x, cdim in
             zip(train_x, self.compressed_dim)])

    def update_memory(self, sample_x: TensorList, sample_y: TensorList, learning_rate = None):
        replace_ind = self.update_sample_weights(self.sample_weights, self.previous_replace_ind, self.num_stored_samples, self.num_init_samples, self.fparams, learning_rate)
        self.previous_replace_ind = replace_ind
        for train_samp, x, ind in zip(self.training_samples, sample_x, replace_ind):
            train_samp[ind:ind+1,...] = x
        for y_memory, y, ind in zip(self.y, sample_y, replace_ind):
            y_memory[ind:ind+1,...] = y
        if self.hinge_mask is not None:
            for m, y, ind in zip(self.hinge_mask, sample_y, replace_ind):
                m[ind:ind+1,...] = (y >= self.params.hinge_threshold).float()
        self.num_stored_samples += 1

    def update_sample_weights(self, sample_weights, previous_replace_ind, num_stored_samples, num_init_samples, fparams, learning_rate = None):
        # Update weights and get index to replace in memory
        replace_ind = []
        for sw, prev_ind, num_samp, num_init, fpar in zip(sample_weights, previous_replace_ind, num_stored_samples, num_init_samples, fparams):
            lr = learning_rate
            if lr is None:
                lr = fpar.learning_rate

            init_samp_weight = getattr(fpar, 'init_samples_minimum_weight', None)
            if init_samp_weight == 0:
                init_samp_weight = None
            s_ind = 0 if init_samp_weight is None else num_init

            if num_samp == 0 or lr == 1:
                sw[:] = 0
                sw[0] = 1
                r_ind = 0
                # Get index to replace
                _, r_ind = torch.min(sw[s_ind:], 0)
                r_ind = r_ind.item() + s_ind

                # Update weights
                if prev_ind is None:
                    sw /= 1 - lr
                    sw[r_ind] = lr
                    sw[r_ind] = sw[prev_ind] / (1 - lr)

            sw /= sw.sum()
            if init_samp_weight is not None and sw[:num_init].sum() < init_samp_weight:
                sw /= init_samp_weight + sw[num_init:].sum()
                sw[:num_init] = init_samp_weight / num_init


        return replace_ind

    def get_label_function(self, sample_pos, sample_scale):
        # Generate label function
        train_y = TensorList()
        target_center_norm = (self.pos - sample_pos) / (sample_scale * self.img_support_sz)
        for sig, sz, ksz in zip(self.sigma, self.feature_sz, self.kernel_size):
            center = sz * target_center_norm + 0.5 * torch.Tensor([(ksz[0] + 1) % 2, (ksz[1] + 1) % 2])
            train_y.append(dcf.label_function_spatial(sz, sig, center))
        return train_y

    def update_state(self, new_pos, new_scale = None):
        # Update scale
        if new_scale is not None:
            self.target_scale = new_scale.clamp(self.min_scale_factor, self.max_scale_factor)
            self.target_sz = self.base_target_sz * self.target_scale

        # Update pos
        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.pos = torch.max(torch.min(new_pos, self.image_sz - inside_offset), inside_offset)

    def get_iounet_box(self, pos, sz, sample_pos, sample_scale):
        """All inputs in original image coordinates"""
        box_center = (pos - sample_pos) / sample_scale + (self.iou_img_sample_sz - 1) / 2
        box_sz = sz / sample_scale
        target_ul = box_center - (box_sz - 1) / 2
        return torch.cat([target_ul.flip((0,)), box_sz.flip((0,))])

    def init_iou_net(self):
        # Setup IoU net
        self.iou_predictor = self.params.features.get_unique_attribute('iou_predictor')
        for p in self.iou_predictor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz, self.pos.round(), self.target_scale)
        target_boxes = TensorList()
        if self.params.iounet_augmentation:
            for T in self.transforms:
                if not isinstance(T, (augmentation.Identity, augmentation.Translation, augmentation.FlipHorizontal, augmentation.FlipVertical, augmentation.Blur)):
                target_boxes.append(self.iou_target_box + torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
        target_boxes = torch.cat(target_boxes.view(1,4), 0).to(self.params.device)

        # Get iou features
        iou_backbone_features = self.get_iou_backbone_features()

        # Remove other augmentations such as rotation
        iou_backbone_features = TensorList([x[:target_boxes.shape[0],...] for x in iou_backbone_features])

        # Extract target feat
        with torch.no_grad():
            target_feat = self.iou_predictor.get_modulation(iou_backbone_features, target_boxes)
        self.target_feat = TensorList([x.detach().mean(0) for x in target_feat])

        if self.params.get('iounet_not_use_reference', False):
            self.target_feat = TensorList([torch.full_like(tf, tf.norm() / tf.numel()) for tf in self.target_feat])

    def refine_target_box(self, sample_pos, sample_scale, scale_ind, update_scale = True):
        # Initial box for refinement
        init_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos, sample_scale)

        # Extract features from the relevant scale
        iou_features = self.get_iou_features()
        iou_features = TensorList([x[scale_ind:scale_ind+1,...] for x in iou_features])

        init_boxes = init_box.view(1,4).clone()
        if self.params.num_init_random_boxes > 0:
            # Get random initial boxes
            square_box_sz = init_box[2:].prod().sqrt()
            rand_factor = square_box_sz * torch.cat([self.params.box_jitter_pos * torch.ones(2), self.params.box_jitter_sz * torch.ones(2)])
            minimal_edge_size = init_box[2:].min()/3
            rand_bb = (torch.rand(self.params.num_init_random_boxes, 4) - 0.5) * rand_factor
            new_sz = (init_box[2:] + rand_bb[:,2:]).clamp(minimal_edge_size)
            new_center = (init_box[:2] + init_box[2:]/2) + rand_bb[:,:2]
            init_boxes = torch.cat([new_center - new_sz/2, new_sz], 1)
            init_boxes = torch.cat([init_box.view(1,4), init_boxes])

        # Refine boxes by maximizing iou
        output_boxes, output_iou = self.optimize_boxes(iou_features, init_boxes)

        # Remove weird boxes with extreme aspect ratios
        output_boxes[:, 2:].clamp_(1)
        aspect_ratio = output_boxes[:,2] / output_boxes[:,3]
        keep_ind = (aspect_ratio < self.params.maximal_aspect_ratio) * (aspect_ratio > 1/self.params.maximal_aspect_ratio)
        output_boxes = output_boxes[keep_ind,:]
        output_iou = output_iou[keep_ind]

        # If no box found
        if output_boxes.shape[0] == 0:

        # Take average of top k boxes
        k = self.params.get('iounet_k', 5)
        topk = min(k, output_boxes.shape[0])
        _, inds = torch.topk(output_iou, topk)
        predicted_box = output_boxes[inds, :].mean(0)
        predicted_iou = output_iou.view(-1, 1)[inds, :].mean(0)

        # Update position
        new_pos = predicted_box[:2] + predicted_box[2:]/2 - (self.iou_img_sample_sz - 1) / 2
        new_pos = new_pos.flip((0,)) * sample_scale + sample_pos
        new_target_sz = predicted_box[2:].flip((0,)) * sample_scale
        new_scale = torch.sqrt(new_target_sz.prod() / self.base_target_sz.prod())

        self.pos_iounet = new_pos.clone()

        if self.params.get('use_iounet_pos_for_learning', True):
            self.pos = new_pos.clone()

        self.target_sz = new_target_sz

        if update_scale:
            self.target_scale = new_scale

    def optimize_boxes(self, iou_features, init_boxes):
        # Optimize iounet boxes
        output_boxes = init_boxes.view(1, -1, 4).to(self.params.device)
        step_length = self.params.box_refinement_step_length
        init_step_length = self.params.box_refinement_step_length
        if isinstance(step_length, (tuple, list)):
            init_step_length = torch.Tensor([step_length[0], step_length[0], step_length[1], step_length[1]]).to(
                self.params.device).view(1, 1, 4)
        box_refinement_space = self.params.get('box_refinement_space', 'default')

        step_length = init_step_length * output_boxes.new_ones(1, output_boxes.shape[1], 1)
        outputs_prev = -99999999 * output_boxes.new_ones(1, output_boxes.shape[1])
        step = torch.zeros_like(output_boxes)

        if box_refinement_space == 'default':
            # Optimization using bounding box space used in original IoUNet
            for i_ in range(self.params.box_refinement_iter):
                # forward pass
                bb_init = output_boxes.clone().detach()
                bb_init.requires_grad = True

                outputs = self.iou_predictor.predict_iou(self.target_feat, iou_features, bb_init)

                if isinstance(outputs, (list, tuple)):
                    outputs = outputs[0]


                # Update mask and step length
                update_mask = (outputs.detach() > outputs_prev) | (self.params.box_refinement_step_decay >= 1)
                update_mask_float = update_mask.view(1, -1, 1).float()
                step_length[~update_mask, :] *= self.params.box_refinement_step_decay
                outputs_prev = outputs.detach().clone()

                # Update proposal
                step = update_mask_float * step_length * bb_init.grad * bb_init[:, :, 2:].repeat(1, 1, 2) - (
                            1.0 - update_mask_float) * step
                output_boxes = bb_init + step

        elif box_refinement_space == 'relative':
            # Optimization using relative bounding box space
            sz_norm = output_boxes[:, :1, 2:].clone()
            output_boxes_rel = bbutils.rect_to_rel(output_boxes, sz_norm)
            for i_ in range(self.params.box_refinement_iter):
                # forward pass
                bb_init_rel = output_boxes_rel.clone().detach()
                bb_init_rel.requires_grad = True

                bb_init = bbutils.rel_to_rect(bb_init_rel, sz_norm)
                outputs = self.iou_predictor.predict_iou(self.target_feat, iou_features, bb_init)

                if isinstance(outputs, (list, tuple)):
                    outputs = outputs[0]


                # Update mask and step length
                update_mask = (outputs.detach() > outputs_prev) | (self.params.box_refinement_step_decay >= 1)
                update_mask_float = update_mask.view(1, -1, 1).float()
                step_length[~update_mask, :] *= self.params.box_refinement_step_decay
                outputs_prev = outputs.detach().clone()

                # Update proposal
                step = update_mask_float * step_length * bb_init_rel.grad - (1.0 - update_mask_float) * step
                output_boxes_rel = bb_init_rel + step

                # for s in outputs.view(-1):
                #     print('{:.2f}  '.format(s.item()), end='')
                # print('')
            # print('')

            output_boxes = bbutils.rel_to_rect(output_boxes_rel, sz_norm)

            raise ValueError('Unknown box_refinement_space {}'.format(box_refinement_space))

        return output_boxes.view(-1, 4).cpu(), outputs.detach().view(-1).cpu()
コード例 #25
    def track(self, image1, image2):

        self.frame_num += 1

        # Convert image
        im1 = numpy_to_torch(image1)
        im2 = numpy_to_torch(image2)
#        self.im = im

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x1 = self.extract_sample(im1, self.pos, sample_scales, self.img_sample_sz)
        test_x2 = self.extract_sample(im2, self.pos, sample_scales, self.img_sample_sz)
        test_x = TensorList([torch.cat((v,i),1) for v, i in zip(test_x1, test_x2)])

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain'
                if getattr(self.params, 'use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif getattr(self.params, 'use_classifier', True):
                self.update_state(sample_pos + translation_vec, sample_scales[scale_ind])

        if self.params.debug >= 2:
            show_tensor(s[scale_ind,...], 5, title='Max score = {:.2f}'.format(torch.max(s[scale_ind,...]).item()))

        # ------- UPDATE ------- #

        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = getattr(self.params, 'hard_negative_learning_rate', None) if hard_negative else None

        if getattr(self.params, 'update_classifier', False) and update_flag:
            # Get train sample
            train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x])

            # Create target_box and label for spatial sample
            target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos, sample_scales[scale_ind])
            train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]).to(self.params.device)

            # Update the classifier model
            self.update_classifier(train_x, train_y, target_box, learning_rate, s[scale_ind,...])

            # Update memory
            # self.update_memory(train_x, train_y, learning_rate)

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found' and hasattr(self, 'pos_iounet'):
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        return new_state.tolist()
コード例 #26
ファイル: depth.py プロジェクト: yjybuaa/pytracking
    def track(self, image, depth, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)
        self.im = im    # For debugging only

        #convert depth
        dp = numpy_to_torch(depth)
        self.dp = dp    # For debugging only

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_x = self.extract_processed_sample(im, dp, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        scores_raw = self.apply_filter(test_x)
        translation_vec, scale_ind, s, flag = self.localize_target(scores_raw)

        # Update position and scale
        if flag != 'not_found':
            if self.use_iou_net:
                update_scale_flag = self.params.get('update_scale_when_uncertain', True) or flag != 'uncertain'
                if self.params.get('use_classifier', True):
                    self.update_state(sample_pos + translation_vec)
                self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag)
            elif self.params.get('use_classifier', True):
                self.update_state(sample_pos + translation_vec, sample_scales[scale_ind])

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score
        self.debug_info['flag'] = flag

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # ------- UPDATE ------- #

        # Check flags and set learning rate if hard negative
        update_flag = flag not in ['not_found', 'uncertain']
        hard_negative = (flag == 'hard_negative')
        learning_rate = self.params.hard_negative_learning_rate if hard_negative else None

        if update_flag:
            # Get train sample
            train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x])

            # Create label for sample
            train_y = self.get_label_function(sample_pos, sample_scales[scale_ind])

            # Update memory
            self.update_memory(train_x, train_y, learning_rate)

        # Train filter
        if hard_negative:
        elif (self.frame_num-1) % self.params.train_skipping == 0:

        # Set the pos of the tracker to iounet pos
        if self.use_iou_net and flag != 'not_found':
            self.pos = self.pos_iounet.clone()

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        out = {'target_bbox': new_state.tolist()}
        return out
コード例 #27
    def refine_target_box(self, sample_pos, sample_scale, scale_ind, update_scale = True):
        # Initial box for refinement
        init_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos, sample_scale)

        # Extract features from the relevant scale
        iou_features = self.get_iou_features()
        iou_features = TensorList([x[scale_ind:scale_ind+1,...] for x in iou_features])

        init_boxes = init_box.view(1,4).clone()
        if self.params.num_init_random_boxes > 0:
            square_box_sz = init_box[2:].prod().sqrt()
            rand_factor = square_box_sz * torch.cat([self.params.box_jitter_pos * torch.ones(2), self.params.box_jitter_sz * torch.ones(2)])

            minimal_edge_size = init_box[2:].min()/3
            rand_bb = (torch.rand(self.params.num_init_random_boxes, 4) - 0.5) * rand_factor
            new_sz = (init_box[2:] + rand_bb[:,2:]).clamp(minimal_edge_size)
            new_center = (init_box[:2] + init_box[2:]/2) + rand_bb[:,:2]
            init_boxes = torch.cat([new_center - new_sz/2, new_sz], 1)
            init_boxes = torch.cat([init_box.view(1,4), init_boxes])

            # Generate smaller boxes
            if hasattr(self.params, 'iounet_use_small_proposals') and self.params.iounet_use_small_proposals:
                if hasattr(self.params, 'iounet_use_iterative_shrinking') and self.params.iounet_use_iterative_shrinking:
                    init_center = init_box[:2] + init_box[2:] / 2
                    prev_box = init_box.clone()
                    for _ in range(self.params.iounet_num_small_proposals):
                        new_sz = prev_box[2:].clone()
                        if new_sz[1] > new_sz[0]:
                            new_sz[1] *= self.params.iounet_small_box_factor
                            new_sz[0] *= self.params.iounet_small_box_factor

                        new_box = torch.cat((init_center - new_sz / 2, new_sz), 0).view(1,4)
                        init_boxes = torch.cat([new_box, init_boxes])
                        prev_box = new_box.squeeze()
                    if init_box[2] > init_box[3]:
                        new_width = torch.linspace(init_box[2]*self.params.iounet_small_box_factor, init_box[2],
                                                   self.params.iounet_num_small_proposals).view(-1, 1)
                        new_height = init_box[3].view(1, 1).expand(new_width.numel(), -1)
                        new_height = torch.linspace(init_box[3] * self.params.iounet_small_box_factor, init_box[3],
                                                    self.params.iounet_num_small_proposals).view(-1, 1)
                        new_width = init_box[2].view(1, 1).expand(new_height.numel(), -1)

                    new_sz = torch.cat((new_width, new_height), 1)
                    init_center = init_box[:2] + init_box[2:]/2
                    small_proposals = torch.cat((init_center.expand(new_sz.shape[0], -1) - new_sz/2, new_sz), 1)

                    init_boxes = torch.cat([small_proposals, init_boxes])

        if not getattr(self.params, 'iounet_update_aspect_ratio', True):
            init_boxes[...,2:] = init_boxes[...,2:].prod(dim=1, keepdim=True)/init_box[2:].prod() * init_box[2:]

        # with torch.autograd.profiler.profile(use_cuda=True) as prof:
        output_boxes, output_iou = self.optimize_boxes(iou_features, init_boxes)
        # print(prof.key_averages())
        # print(prof.total_average().cuda_time_total_str)

        # Remove weird boxes
        output_boxes[:, 2:].clamp_(1)
        aspect_ratio = output_boxes[:,2] / output_boxes[:,3]
        keep_ind = (aspect_ratio < self.params.maximal_aspect_ratio) * (aspect_ratio > 1/self.params.maximal_aspect_ratio)
        output_boxes = output_boxes[keep_ind,:]
        output_iou = output_iou[keep_ind]

        # If no box found
        if output_boxes.shape[0] == 0:

        # predict box
        k = getattr(self.params, 'iounet_k', 5)

        topk = min(k, output_boxes.shape[0])
        _, inds = torch.topk(output_iou, topk)
        predicted_box = output_boxes[inds, :].mean(0)
        predicted_iou = output_iou.view(-1, 1)[inds, :].mean(0)

        new_pos = predicted_box[:2] + predicted_box[2:]/2 - (self.iou_img_sample_sz - 1) / 2
        new_pos = new_pos.flip((0,)) * sample_scale + sample_pos
        new_target_sz = predicted_box[2:].flip((0,)) * sample_scale
        new_scale = torch.sqrt(new_target_sz.prod() / self.base_target_sz.prod())

        self.pos_iounet = new_pos.clone()

        if getattr(self.params, 'use_iounet_pos_for_learning', True):
            self.pos = new_pos.clone()

        self.target_sz = new_target_sz

        if update_scale:
            if hasattr(self.params, 'target_scale_update_rate'):
                self.target_scale = new_scale*self.params.target_scale_update_rate + \
                                    self.target_scale*(1 - self.params.target_scale_update_rate)
                self.target_scale = new_scale
コード例 #28
ファイル: dimp.py プロジェクト: zhengfangwu/pytracking
    def init_classifier(self, init_backbone_feat):
        # Get classification features
        x = self.get_classification_features(init_backbone_feat)

        # Add the dropout augmentation here, since it requires extraction of the classification features
        if 'dropout' in self.params.augmentation and getattr(
                self.params, 'use_augmentation', True):
            num, prob = self.params.augmentation['dropout']
            self.transforms.extend(self.transforms[:1] * num)
            x = torch.cat([
                F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1),

        # Set feature size and other related sizes
        self.feature_sz = torch.Tensor(list(x.shape[-2:]))
        ksz = self.net.classifier.filter_size
        self.kernel_size = torch.Tensor(
            [ksz, ksz] if isinstance(ksz, (int, float)) else ksz)
        self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2

        # Construct output window
        self.output_window = None
        if getattr(self.params, 'window_output', False):
            if getattr(self.params, 'use_clipped_window', False):
                self.output_window = dcf.hann2d_clipped(
                    self.output_sz.long() * self.params.effective_search_area /
                self.output_window = dcf.hann2d(self.output_sz.long(),
            self.output_window = self.output_window.squeeze(0)

        # Get target boxes for the different augmentations
        target_boxes = self.init_target_boxes()

        # Set number of iterations
        plot_loss = self.params.debug > 0
        num_iter = getattr(self.params, 'net_opt_iter', None)

        # Get target filter by running the discriminative model prediction module
        with torch.no_grad():
            self.target_filter, _, losses = self.net.classifier.get_filter(
                x, target_boxes, num_iter=num_iter, compute_losses=plot_loss)

        # Init memory
        if getattr(self.params, 'update_classifier', True):

        if plot_loss:
            if isinstance(losses, dict):
                losses = losses['train']
            self.losses = torch.stack(losses)
            if self.visdom is not None:
                    (self.losses, torch.arange(self.losses.numel())),
                    'lineplot', 3, 'Training Loss')
            elif self.params.debug >= 3:
                plot_graph(self.losses, 10, title='Training loss')
コード例 #29
ファイル: deep.py プロジェクト: danielism97/CFKD
 def dim(self):
     return TensorList([self.layer_dim[l] for l in self.output_layers])
コード例 #30
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        sample_pos = self.pos.round()
        sample_scales = self.target_scale * self.params.scale_factors
        test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz)

        # Compute scores
        sf = self.apply_filter(test_xf)
        translation_vec, scale_ind, s = self.localize_target(sf)
        scale_change_factor = self.params.scale_factors[scale_ind]

        # Update position and scale
        self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor)

        score_map = s[scale_ind, ...]
        max_score = torch.max(score_map).item()
        self.debug_info['max_score'] = max_score

        if self.visdom is not None:
            self.visdom.register(score_map, 'heatmap', 2, 'Score Map')
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')
        elif self.params.debug >= 2:
            show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score))

        # if self.params.debug >= 3:
        #     for i, hf in enumerate(self.filter):
        #         show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i)

        # ------- UPDATE ------- #

        # Get train sample
        train_xf = TensorList([xf[scale_ind:scale_ind+1, ...] for xf in test_xf])

        # Shift the sample
        shift_samp = 2*math.pi * (self.pos - sample_pos) / (sample_scales[scale_ind] * self.img_support_sz)
        train_xf = fourier.shift_fs(train_xf, shift=shift_samp)

        # Update memory

        # Train filter
        if self.frame_num % self.params.train_skipping == 1:
            self.filter_optimizer.run(self.params.CG_iter, train_xf)

        # Return new state
        new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

        out = {'target_bbox': new_state.tolist()}
        return out