Esempio n. 1
0
    def init_iou_net(self):
        # Setup IoU net
        self.iou_predictor = self.params.features.get_unique_attribute('iou_predictor')
        for p in self.iou_predictor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz, self.pos.round(), self.target_scale)
        target_boxes = TensorList()
        if self.params.iounet_augmentation:
            for T in self.transforms:
                if not isinstance(T, (augmentation.Identity, augmentation.Translation, augmentation.FlipHorizontal, augmentation.FlipVertical, augmentation.Blur)):
                    break
                target_boxes.append(self.iou_target_box + torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
        else:
            target_boxes.append(self.iou_target_box.clone())
        target_boxes = torch.cat(target_boxes.view(1,4), 0).to(self.params.device)

        # Get iou features
        iou_backbone_features = self.get_iou_backbone_features()

        # Remove other augmentations such as rotation
        iou_backbone_features = TensorList([x[:target_boxes.shape[0],...] for x in iou_backbone_features])

        # Extract target feat
        with torch.no_grad():
            target_feat = self.iou_predictor.get_filter(iou_backbone_features, target_boxes)
        self.target_feat = TensorList([x.detach().mean(0) for x in target_feat])

        if getattr(self.params, 'iounet_not_use_reference', False):
            self.target_feat = TensorList([torch.full_like(tf, tf.norm() / tf.numel()) for tf in self.target_feat])
Esempio n. 2
0
File: dimp.py Progetto: xiaozai/DAL
    def init_iou_net(self, backbone_feat):
        # Setup IoU net and objective
        for p in self.net.bb_regressor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.classifier_target_box = self.get_iounet_box(self.pos, self.target_sz, self.init_sample_pos, self.init_sample_scale)
        target_boxes = TensorList()
        if self.params.iounet_augmentation:
            for T in self.transforms:
                if not isinstance(T, (augmentation.Identity, augmentation.Translation, augmentation.FlipHorizontal, augmentation.FlipVertical, augmentation.Blur)):
                    break
                target_boxes.append(self.classifier_target_box + torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
        else:
            target_boxes.append(self.classifier_target_box + torch.Tensor([self.transforms[0].shift[1], self.transforms[0].shift[0], 0, 0]))
        target_boxes = torch.cat(target_boxes.view(1,4), 0).to(self.params.device)

        # Get iou features
        iou_backbone_feat = self.get_iou_backbone_features(backbone_feat)

        # Remove other augmentations such as rotation
        iou_backbone_feat = TensorList([x[:target_boxes.shape[0],...] for x in iou_backbone_feat])

        # Get modulation vector
        self.iou_modulation = self.get_iou_modulation(iou_backbone_feat, target_boxes)
        self.iou_modulation = TensorList([x.detach().mean(0) for x in self.iou_modulation])
Esempio n. 3
0
 def get_label_function(self, sample_pos, sample_scale):
     # Generate label function
     train_y = TensorList()
     target_center_norm = (self.pos - sample_pos) / (sample_scale * self.img_support_sz)
     for sig, sz, ksz in zip(self.sigma, self.feature_sz, self.kernel_size):
         center = sz * target_center_norm + 0.5 * torch.Tensor([(ksz[0] + 1) % 2, (ksz[1] + 1) % 2])
         train_y.append(dcf.label_function_spatial(sz, sig, center))
     return train_y
Esempio n. 4
0
 def init_target_boxes(self):
     self.classifier_target_box = self.get_iounet_box(self.pos, self.target_sz, self.pos.round(), self.target_scale)
     init_target_boxes = TensorList()
     for T in self.transforms:
         init_target_boxes.append(self.classifier_target_box + torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
     init_target_boxes = torch.cat(init_target_boxes.view(1, 4), 0).to(self.params.device)
     self.target_boxes = init_target_boxes.new_zeros(self.params.sample_memory_size, 4)
     self.target_boxes[:init_target_boxes.shape[0],:] = init_target_boxes
     return init_target_boxes
Esempio n. 5
0
 def init_target_boxes(self):
     """Get the target bounding boxes for the initial augmented samples."""
     self.classifier_target_box = self.get_training_box(self.pos, self.target_sz, self.init_sample_pos, self.init_sample_scale)
     init_target_boxes = TensorList()
     for T in self.transforms:
         init_target_boxes.append(self.classifier_target_box + torch.Tensor([T.shift[1], T.shift[0], 0, 0]))
     init_target_boxes = torch.cat(init_target_boxes.view(1, 4), 0).to(self.params.device)
     self.target_boxes = init_target_boxes.new_zeros(self.params.sample_memory_size, 4)
     self.target_boxes[:init_target_boxes.shape[0],:] = init_target_boxes
     self.target_boxes_cls_72 = self.target_boxes.clone()
     self.target_boxes_cls_18 = self.target_boxes.clone()
     self.target_boxes_reg_72 = self.target_boxes.clone()
     return init_target_boxes
Esempio n. 6
0
    def init_dr_net(self):
        # Setup IoU net
        self.box_predictor = self.params.features.get_unique_attribute(
            'iou_predictor')
        for p in self.box_predictor.parameters():
            p.requires_grad = False

        # Get target boxes for the different augmentations
        self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz,
                                                  self.pos.round(),
                                                  self.target_scale)
        target_boxes = TensorList()
        target_boxes.append(self.iou_target_box.clone())
        target_boxes = torch.cat(target_boxes.view(1, 4),
                                 0).to(self.params.device)

        # Get iou features
        iou_backbone_features = self.get_iou_backbone_features()

        # Remove other augmentations such as rotation
        iou_backbone_features = TensorList(
            [x[:target_boxes.shape[0], ...] for x in iou_backbone_features])

        # Extract target feat
        with torch.no_grad():
            target_feat = self.box_predictor.get_filter(
                iou_backbone_features, target_boxes)
        self.target_feat = TensorList(
            [x.detach().mean(0) for x in target_feat])

        if getattr(self.params, 'iounet_not_use_reference', False):
            self.target_feat = TensorList([
                torch.full_like(tf,
                                tf.norm() / tf.numel())
                for tf in self.target_feat
            ])
Esempio n. 7
0
class CCOT(BaseTracker):
    def initialize_features(self, im):
        if not getattr(self, 'features_initialized', False):
            self.params.features.initialize(im)
        self.features_initialized = True

    def initialize(self, image, info: dict, gpu_device) -> dict:
        # Initialize some stuff
        self.frame_num = 1
        self.params.device = 'cuda:{0}'.format(
            gpu_device) if self.params.use_gpu else 'cpu'

        # Convert image
        im = numpy_to_torch(image)
        self.image_sz = torch.Tensor([im.shape[2], im.shape[3]])

        # Initialize features
        self.initialize_features(im)

        # Chack if image is color
        self.params.features.set_is_color(image.shape[2] == 3)

        # Get feature specific params
        self.fparams = self.params.features.get_fparams('feature_params')

        # Get position and size
        self.points = TensorList(
            [torch.Tensor([p[0], p[1]]) for p in info['points']])
        self.org_points = self.points.clone()
        self.target_sz = torch.Tensor(
            [info['target_sz'][0], info['target_sz'][1]])

        # Use odd square search area and set sizes
        feat_max_stride = max(self.params.features.stride())
        self.img_sample_sz = self.image_sz.clone()
        self.img_sample_sz += feat_max_stride - self.img_sample_sz % (
            2 * feat_max_stride)

        # Set other sizes (corresponds to ECO code)
        self.img_support_sz = self.img_sample_sz
        self.mid_point = self.img_support_sz // 2
        self.feature_sz = self.params.features.size(self.img_sample_sz)
        self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2
        self.output_sz = self.img_support_sz  # Interpolated size of the output

        # Number of filters
        self.num_filters = len(self.filter_sz)

        # Get window function
        #self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz])
        self.window = TensorList([
            torch.ones((1, 1, int(sz[0].item()),
                        int(sz[1].item()))).to(self.params.device)
            for sz in self.feature_sz
        ])
        #self.window = TensorList([dcf.tukey2d(sz).to(self.params.device) for sz in self.feature_sz])

        # Get interpolation function
        self.interp_fs = TensorList([
            dcf.get_interp_fourier(sz, self.params.interpolation_method,
                                   self.params.interpolation_bicubic_a,
                                   self.params.interpolation_centering,
                                   self.params.interpolation_windowing,
                                   self.params.device) for sz in self.filter_sz
        ])

        # Get label function
        output_sigma_factor = self.fparams.attribute('output_sigma_factor')
        sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(
            self.target_sz.prod()) * output_sigma_factor
        yf_zero = TensorList([
            dcf.label_function(sz, sig).to(self.params.device)
            for sz, sig in zip(self.filter_sz, sigma)
        ])
        yf_zero = complex.complex(yf_zero)
        self.yf = TensorList()
        for p in self.points:
            shift_sample = 2 * math.pi * (self.mid_point -
                                          p) / self.img_support_sz
            self.yf.append(
                TensorList(
                    [fourier.shift_fs(yfs, shift_sample) for yfs in yf_zero]))

        # Optimization options
        self.params.precond_learning_rate = self.fparams.attribute(
            'learning_rate')
        if self.params.CG_forgetting_rate is None or max(
                self.params.precond_learning_rate) >= 1:
            self.params.direction_forget_factor = 0
        else:
            self.params.direction_forget_factor = (
                1 - max(self.params.precond_learning_rate)
            )**self.params.CG_forgetting_rate

        # Extract and transform sample
        x = self.generate_init_samples(im).to(self.params.device)
        self.x = x
        # Transform to get the training sample
        train_xf = self.preprocess_sample(x)

        # Shift the samples back
        if 'shift' in self.params.augmentation:
            for xf in train_xf:
                if xf.shape[0] == 1:
                    continue
                for i, shift in enumerate(self.params.augmentation['shift']):
                    shift_samp = 2 * math.pi * torch.Tensor(
                        shift) / self.img_support_sz
                    xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i,
                                                               ...],
                                                            shift=shift_samp)

        # Initialize first-frame training samples
        num_init_samples = train_xf.size(0)

        self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4)

        # Initialize memory
        # Initialize filter
        self.training_samples = TensorList([
            xf.new_zeros(xf.shape[2], xf.shape[3],
                         self.params.sample_memory_size, xf.shape[1], 2)
            for xf in train_xf
        ])
        self.filters = TensorList([
            TensorList([
                xf.new_zeros(1, xf.shape[1], xf.shape[2], xf.shape[3], 2)
                for xf in train_xf
            ]) for i in range(len(self.points))
        ])

        self.init_sample_weights = TensorList(
            [xf.new_ones(1) / xf.shape[0] for xf in train_xf])
        self.sample_weights = TensorList(
            [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf])
        for sw, init_sw, num in zip(self.sample_weights,
                                    self.init_sample_weights,
                                    num_init_samples):
            sw[:num] = init_sw

        # Get regularization filter
        self.reg_filter = TensorList([
            dcf.get_reg_filter(self.img_support_sz, self.target_sz,
                               fparams).to(self.params.device)
            for fparams in self.fparams
        ])
        self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1)

        # Sample counters and weights
        self.num_stored_samples = num_init_samples
        self.previous_replace_ind = [None] * len(self.num_stored_samples)

        for train_samp, init_samp in zip(self.training_samples,
                                         self.init_training_samples):
            train_samp[:, :, :init_samp.shape[2], :, :] = init_samp

        sample_energy = complex.abs_sqr(self.training_samples).mean(
            dim=2, keepdim=True).permute(2, 3, 0, 1)
        # Do joint optimization
        for i in range(len(self.points)):
            print('{0}'.format(i), end=', ')
            ts = self.training_samples.clone()
            yf = self.yf[i]
            filters = self.filters[i]
            i_sw = self.init_sample_weights.clone()
            re = self.reg_energy.clone()
            sw = self.sample_weights.clone()
            rf = self.reg_filter.clone()
            filter_optimizer = FilterOptim(self.params, re)
            filter_optimizer.register(filters, ts, yf, sw, rf)
            filter_optimizer.sample_energy = sample_energy.clone()

            filter_optimizer.run(self.params.init_CG_iter)

            # Post optimization
            filter_optimizer.run(self.params.post_init_CG_iter)
            self.filters[i] = filter_optimizer.filter
        self.symmetrize_filter()
        print()

    def track(self, image, update=False) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num
        # Convert image
        im = numpy_to_torch(image)

        # ------- LOCALIZATION ------- #

        # Get sample
        test_xf = self.extract_fourier_sample(im)

        # Compute scores
        sfs = self.apply_filters(test_xf)
        out = TensorList([
            self.localize_and_update_target(sfs[i], i)
            for i in range(len(self.points))
        ])

        return out

    def apply_filters(self, sample_xf: TensorList) -> torch.Tensor:
        return TensorList([
            complex.mult(f, sample_xf).sum(1, keepdim=True)
            for f in self.filters
        ])

    def apply_filter(self, sample_xf: TensorList) -> torch.Tensor:
        return complex.mult(self.filter, sample_xf).sum(1, keepdim=True)

    def localize_and_update_target(self, sf: TensorList, i):
        if self.params.score_fusion_strategy == 'weightedsum':
            weight = self.fparams.attribute('translation_weight')
            sf = fourier.sum_fs(weight * sf)
            scores = fourier.sample_fs(sf, self.output_sz)
        else:
            raise ValueError('Unknown score fusion strategy.')

        # Get maximum
        max_score, max_disp = dcf.max2d(scores)
        max_disp = max_disp.float().cpu()

        # Convert to displacements in the base scale
        if self.params.score_fusion_strategy in ['sum', 'weightedsum']:
            disp = (max_disp +
                    self.output_sz / 2) % self.output_sz - self.output_sz / 2
        elif self.params.score_fusion_strategy == 'transcale':
            disp = max_disp - self.output_sz / 2

        # Compute translation vector and scale change factor
        translation_vec = disp.view(-1) * (self.img_support_sz /
                                           self.output_sz)

        # Update pos
        new_pos = self.mid_point.round() + translation_vec

        inside_ratio = 0.2
        inside_offset = (inside_ratio - 0.5) * self.target_sz
        self.points[i] = torch.max(
            torch.min(new_pos, self.image_sz - inside_offset), inside_offset)

        return self.points[i].round(), max_score, scores

    def extract_fourier_sample(self, im: torch.Tensor) -> TensorList:
        x = F.interpolate(im, self.output_sz.long().tolist(), mode='bilinear')
        x = TensorList([
            f.get_feature(x) for f in self.params.features.features
        ]).unroll().to(self.params.device)
        return self.preprocess_sample(x)

    def preprocess_sample(self, x: TensorList) -> TensorList:
        x *= self.window
        sample_xf = fourier.cfft2(x)
        return TensorList([
            dcf.interpolate_dft(xf, bf)
            for xf, bf in zip(sample_xf, self.interp_fs)
        ])

    def generate_init_samples(self, im: torch.Tensor) -> TensorList:
        # Do data augmentation
        transforms = [augmentation.Identity()]
        if 'shift' in self.params.augmentation:
            transforms.extend([
                augmentation.Translation(shift)
                for shift in self.params.augmentation['shift']
            ])
        if 'fliplr' in self.params.augmentation and self.params.augmentation[
                'fliplr']:
            transforms.append(augmentation.FlipHorizontal())
        if 'rotate' in self.params.augmentation:
            transforms.extend([
                augmentation.Rotate(angle)
                for angle in self.params.augmentation['rotate']
            ])
        if 'blur' in self.params.augmentation:
            transforms.extend([
                augmentation.Blur(sigma)
                for sigma in self.params.augmentation['blur']
            ])

        im_patch = F.interpolate(im,
                                 self.output_sz.long().tolist(),
                                 mode='bilinear')
        im_patches = torch.cat([T(im_patch) for T in transforms])
        init_samples = TensorList([
            f.get_feature(im_patches) for f in self.params.features.features
        ]).unroll()

        # Remove augmented samples for those that shall not have
        for i, use_aug in enumerate(
                self.fparams.attribute('use_augmentation')):
            if not use_aug:
                init_samples[i] = init_samples[i][0:1, ...]

        if 'dropout' in self.params.augmentation:
            num, prob = self.params.augmentation['dropout']
            for i, use_aug in enumerate(
                    self.fparams.attribute('use_augmentation')):
                if use_aug:
                    init_samples[i] = torch.cat([
                        init_samples[i],
                        F.dropout2d(init_samples[i][0:1, ...].expand(
                            num, -1, -1, -1),
                                    p=prob,
                                    training=True)
                    ])

        return init_samples

    def symmetrize_filter(self):
        for f in self.filters:
            for hf in f:
                hf[:, :, :, 0, :] /= 2
                hf[:, :, :, 0, :] += complex.conj(hf[:, :, :, 0, :].flip(
                    (2, )))