예제 #1
0
    def forward(self,
                meta_parameter: TensorList,
                feat,
                label,
                sample_weight=None):
        # Assumes multiple filters, i.e.  (sequences, filters, feat_dim, fH, fW)
        filter = meta_parameter[0]

        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1

        # Compute scores
        scores = filter_layer.apply_filter(
            feat, filter, dilation_factors=self.filter_dilation_factors)

        if sample_weight is None:
            sample_weight = math.sqrt(1.0 / num_images)
        elif isinstance(sample_weight, torch.Tensor):
            if sample_weight.numel() == scores.numel():
                sample_weight = sample_weight.view(scores.shape)
            elif sample_weight.dim() == 1:
                sample_weight = sample_weight.view(-1, 1, 1, 1, 1)

        label = label.view(scores.shape)

        data_residual = sample_weight * (scores - label)

        # Compute regularization residual. Put batch in second dimension
        reg_residual = self.filter_reg * filter.view(1, num_sequences, -1)

        return TensorList([data_residual, reg_residual])
예제 #2
0
    def regress(self, weights, feat):
        """Run regressor (filter) on the features (feat)."""

        offset_maps = filter_layer.apply_filter(feat, weights)
        offset_maps = torch.relu(offset_maps)

        return offset_maps
예제 #3
0
    def classify(self, weights, feat):
        """Run classifier (filter) on the features (feat)."""

        scores = filter_layer.apply_filter(feat, weights)

        if self.output_activation is not None:
            scores = self.output_activation(scores)

        return scores
예제 #4
0
    def track_frame(self, filter_weights, backbone_feat):
        if backbone_feat.dim() == 5:
            num_sequences = backbone_feat.shape[1]
            backbone_feat = backbone_feat.reshape(-1, *backbone_feat.shape[-3:])
        else:
            num_sequences = None

        test_feat = self.extract_classification_feat(backbone_feat, num_sequences)

        scores = filter_layer.apply_filter(test_feat, filter_weights)

        return scores
예제 #5
0
    def forward(self,
                meta_parameter: TensorList,
                feat,
                bb,
                sample_weight=None,
                is_distractor=None):
        filter = meta_parameter[0]

        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1
        filter_sz = (filter.shape[-2], filter.shape[-1])

        # Compute scores
        scores = filter_layer.apply_filter(feat, filter)

        # Compute distance map
        center = ((bb[..., :2] + bb[..., 2:] / 2) / self.feat_stride).reshape(
            -1, 2).flip((1, ))
        if is_distractor is not None:
            center[is_distractor.reshape(-1), :] = 99999
        dist_map = self.distance_map(center, scores.shape[-2:])

        # Compute label map masks and weight
        label_map = self.label_map_predictor(dist_map).reshape(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])
        target_mask = self.target_mask_predictor(dist_map).reshape(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])
        spatial_weight = self.spatial_weight_predictor(dist_map).reshape(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])

        if sample_weight is None:
            sample_weight = math.sqrt(1.0 / num_images) * spatial_weight
        elif isinstance(sample_weight, torch.Tensor):
            sample_weight = sample_weight.sqrt().reshape(-1, 1, 1,
                                                         1) * spatial_weight

        # Compute data residual
        scores_act = self.score_activation(scores, target_mask)
        data_residual = sample_weight * (scores_act - label_map)

        # Compute regularization residual. Put batch in second dimension
        reg_residual = self.filter_reg * filter.reshape(1, num_sequences, -1)

        return TensorList([data_residual, reg_residual])
예제 #6
0
    def classify(self, weights, feat):
        """Run classifier (filter) on the features (feat)."""

        scores = filter_layer.apply_filter(feat, weights)

        return scores
예제 #7
0
    def forward(self,
                weights,
                feat,
                bb,
                sample_weight=None,
                num_iter=None,
                compute_losses=True):
        """Runs the optimizer module.
        Note that [] denotes an optional dimension.
        args:
            weights:  Initial weights. Dims (sequences, feat_dim, wH, wW).
            feat:  Input feature maps. Dims (images_in_sequence, [sequences], feat_dim, H, W).
            bb:  Target bounding boxes (x, y, w, h) in the image coords. Dims (images_in_sequence, [sequences], 4).
            sample_weight:  Optional weight for each sample. Dims: (images_in_sequence, [sequences]).
            num_iter:  Number of iterations to run.
            compute_losses:  Whether to compute the (train) loss in each iteration.
        returns:
            weights:  The final oprimized weights.
            weight_iterates:  The weights computed in each iteration (including initial input and final output).
            losses:  Train losses."""

        # Sizes
        num_iter = self.num_iter if num_iter is None else num_iter
        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1
        filter_sz = (weights.shape[-2], weights.shape[-1])
        output_sz = (feat.shape[-2] + (weights.shape[-2] + 1) % 2,
                     feat.shape[-1] + (weights.shape[-1] + 1) % 2)

        # Get learnable scalars
        step_length_factor = torch.exp(self.log_step_length)
        reg_weight = (self.filter_reg *
                      self.filter_reg).clamp(min=self.min_filter_reg**2)

        # Compute distance map
        dmap_offset = (torch.Tensor(filter_sz).to(bb.device) % 2) / 2.0
        center = ((bb[..., :2] + bb[..., 2:] / 2) / self.feat_stride).view(
            -1, 2).flip((1, )) - dmap_offset
        dist_map = self.distance_map(center, output_sz)

        # Compute label map masks and weight
        label_map = self.label_map_predictor(dist_map).view(
            num_images, num_sequences, *dist_map.shape[-2:])
        target_mask = self.target_mask_predictor(dist_map).view(
            num_images, num_sequences, *dist_map.shape[-2:])
        spatial_weight = self.spatial_weight_predictor(dist_map).view(
            num_images, num_sequences, *dist_map.shape[-2:])

        # Get total sample weights
        if sample_weight is None:
            sample_weight = math.sqrt(1.0 / num_images) * spatial_weight
        elif isinstance(sample_weight, torch.Tensor):
            sample_weight = sample_weight.sqrt().view(
                num_images, num_sequences, 1, 1) * spatial_weight

        weight_iterates = [weights]
        losses = []

        for i in range(num_iter):
            if i > 0 and i % self.detach_length == 0:
                weights = weights.detach()

            # Compute residuals
            scores = filter_layer.apply_filter(feat, weights)
            scores_act = self.score_activation(scores, target_mask)
            score_mask = self.score_activation_deriv(scores, target_mask)
            residuals = sample_weight * (scores_act - label_map)

            if compute_losses:
                losses.append(((residuals**2).sum() + reg_weight *
                               (weights**2).sum()) / num_sequences)

            # Compute gradient
            residuals_mapped = score_mask * (sample_weight * residuals)
            weights_grad = filter_layer.apply_feat_transpose(feat, residuals_mapped, filter_sz, training=self.training) + \
                          reg_weight * weights

            # Map the gradient with the Jacobian
            scores_grad = filter_layer.apply_filter(feat, weights_grad)
            scores_grad = sample_weight * (score_mask * scores_grad)

            # Compute optimal step length
            alpha_num = (weights_grad * weights_grad).sum(dim=(1, 2, 3))
            alpha_den = ((scores_grad * scores_grad).view(
                num_images, num_sequences, -1).sum(dim=(0, 2)) +
                         reg_weight * alpha_num).clamp(1e-8)
            alpha = alpha_num / alpha_den

            # Update filter
            weights = weights - (step_length_factor *
                                 alpha.view(-1, 1, 1, 1)) * weights_grad

            # Add the weight iterate
            weight_iterates.append(weights)

        if compute_losses:
            scores = filter_layer.apply_filter(feat, weights)
            scores = self.score_activation(scores, target_mask)
            losses.append((((sample_weight *
                             (scores - label_map))**2).sum() + reg_weight *
                           (weights**2).sum()) / num_sequences)

        return weights, weight_iterates, losses
예제 #8
0
    def forward(self,
                weights,
                feat,
                bb,
                radius=0,
                dim=4,
                sample_weight=None,
                num_iter=None,
                compute_losses=True):
        """Runs the optimizer module.
        args:
            weights:  Initial weights. Dims (sequences, feat_dim, wH, wW).
            feat:  Input feature maps. Dims (images_in_sequence, [sequences], feat_dim, H, W).
            bb:  Target bounding boxes (x, y, w, h) in the image coords. Dims (images_in_sequence, [sequences], 4).
            radius: The size of vicinity of the target center.
            dim: Dims of offset maps, default is 4, indicating the distance from the center to four sides of the target.
            num_iter:  Number of iterations to run.
            compute_losses:  Whether to compute the (train) loss in each iteration.
        returns:
            weights:  The final oprimized weights.
            weight_iterates:  The weights computed in each iteration (including initial input and final output).
            losses:  Train losses."""

        # Sizes
        num_iter = self.num_iter if num_iter is None else num_iter
        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1
        filter_sz = (weights.shape[-2], weights.shape[-1])
        output_sz = (feat.shape[-2] + (weights.shape[-2] + 1) % 2,
                     feat.shape[-1] + (weights.shape[-1] + 1) % 2)

        # Get learnable scalars
        step_length_factor = torch.exp(self.log_step_length)
        reg_weight = (self.filter_reg *
                      self.filter_reg).clamp(min=self.min_filter_reg**2)
        # print("filter_reg: {}".format(self.filter_reg))
        # print("log_step_length: {}".format(self.log_step_length))

        w2h2_label, label_mask = self.generate_w2h2_label(bb,
                                                          num_images,
                                                          num_sequences,
                                                          radius=radius,
                                                          output_sz=output_sz,
                                                          dim=dim)
        # shape: (num_images, num_sequences, 4, 72, 72)

        # Get total sample weights
        if sample_weight is None:
            sample_weight = math.sqrt(1.0 / num_images)
        elif isinstance(sample_weight, torch.Tensor):
            sample_weight = sample_weight.sqrt().view(num_images,
                                                      num_sequences, 1, 1, 1)

        weight_iterates = [weights]
        losses = []

        for i in range(num_iter):
            if i > 0 and i % self.detach_length == 0:
                weights = weights.detach()

            # Compute residuals
            # feat shape: [num_images, num_sequences, 256, 72, 72], weights shape: [num_sequences, 4, 256, 5, 5]
            scores = filter_layer.apply_filter(feat, weights)
            residuals = sample_weight * label_mask * (scores -
                                                      w2h2_label.detach())

            if compute_losses:
                losses.append((residuals**2).mean())

            # Compute gradient
            residuals_mapped = sample_weight * residuals
            weights_grad = filter_layer.apply_feat_transpose(feat, residuals_mapped, filter_sz, training=self.training) + \
                          reg_weight * weights
            # print("weights_grad shape: {}".format(weights_grad.shape))   # [num_sequences, 4, 256, 5, 5]

            # Map the gradient with the Jacobian
            scores_grad = filter_layer.apply_filter(feat, weights_grad)
            scores_grad = sample_weight * scores_grad
            # print("scores_grad shape: {}".format(scores_grad.shape))    # [num_images, num_sequences, 4, 72, 72]

            # Compute optimal step length
            alpha_num = (weights_grad * weights_grad).view(num_sequences,
                                                           -1).sum(dim=1)
            alpha_den = ((scores_grad * scores_grad).view(
                num_images, num_sequences, -1).sum(dim=(0, 2)) +
                         reg_weight * alpha_num).clamp(1e-8)
            # print("alpha_num: {}, alpha_den: {}".format(alpha_num, alpha_den))
            alpha = alpha_num / alpha_den

            # Update filter
            weights = weights - (step_length_factor *
                                 alpha.view(-1, 1, 1, 1, 1)) * weights_grad

            # Add the weight iterate
            weight_iterates.append(weights)

        if compute_losses:
            scores = filter_layer.apply_filter(feat, weights)
            losses.append(((sample_weight * label_mask *
                            (scores - w2h2_label.detach()))**2).mean())

        return weights, weight_iterates, losses
예제 #9
0
    def forward(self, weights, feat, bb, sample_weight=None, num_iter=None, compute_losses=True):
        """Runs the optimizer module.
        Note that [] denotes an optional dimension.
        args:
            weights:  Initial weights. Dims (sequences, feat_dim, wH, wW).
            feat:  Input feature maps. Dims (images_in_sequence, [sequences], feat_dim, H, W).
            bb:  Target bounding boxes (x, y, w, h) in the image coords. Dims (images_in_sequence, [sequences], 4).
            sample_weight:  Optional weight for each sample. Dims: (images_in_sequence, [sequences]).
            num_iter:  Number of iterations to run.
            compute_losses:  Whether to compute the (train) loss in each iteration.
        returns:
            weights:  The final oprimized weights.
            weight_iterates:  The weights computed in each iteration (including initial input and final output).
            losses:  Train losses."""

        # Sizes
        num_iter = self.num_iter if num_iter is None else num_iter
        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1
        filter_sz = (weights.shape[-2], weights.shape[-1])
        output_sz = (feat.shape[-2] + (weights.shape[-2] + 1) % 2, feat.shape[-1] + (weights.shape[-1] + 1) % 2)

        # Get learnable scalars
        step_length_factor = torch.exp(self.log_step_length)
        reg_weight = (self.filter_reg*self.filter_reg).clamp(min=self.min_filter_reg**2)

        # Compute label density
        offset = (torch.Tensor(filter_sz).to(bb.device) % 2) / 2.0
        center = ((bb[..., :2] + bb[..., 2:] / 2) / self.feat_stride).flip((-1,)) - offset
        label_density = self.get_label_density(center, output_sz)

        # Get total sample weights
        if sample_weight is None:
            sample_weight = torch.Tensor([1.0 / num_images]).to(feat.device)
        elif isinstance(sample_weight, torch.Tensor):
            sample_weight = sample_weight.reshape(num_images, num_sequences, 1, 1)

        exp_reg = 0 if self.softmax_reg is None else math.exp(self.softmax_reg)
        def _compute_loss(scores, weights):
            return torch.sum(sample_weight.reshape(sample_weight.shape[0], -1) *
                             (torch.log(scores.exp().sum(dim=(-2, -1)) + exp_reg) - (label_density * scores).sum(dim=(-2, -1)))) / num_sequences +\
                   reg_weight * (weights ** 2).sum() / num_sequences

        weight_iterates = [weights]
        losses = []

        for i in range(num_iter):
            if i > 0 and i % self.detach_length == 0:
                weights = weights.detach()

            # Compute "residuals"
            scores = filter_layer.apply_filter(feat, weights)
            scores_softmax = activation.softmax_reg(scores.reshape(num_images, num_sequences, -1), dim=2, reg=self.softmax_reg).reshape(scores.shape)
            res = sample_weight*(scores_softmax - label_density)

            if compute_losses:
                losses.append(_compute_loss(scores, weights))

            # Compute gradient
            weights_grad = filter_layer.apply_feat_transpose(feat, res, filter_sz, training=self.training) + \
                          reg_weight * weights

            # Map the gradient with the Hessian
            scores_grad = filter_layer.apply_filter(feat, weights_grad)
            sm_scores_grad = scores_softmax * scores_grad
            hes_scores_grad = sm_scores_grad - scores_softmax * torch.sum(sm_scores_grad, dim=(-2,-1), keepdim=True)
            grad_hes_grad = (scores_grad * hes_scores_grad).reshape(num_images, num_sequences, -1).sum(dim=2).clamp(min=0)
            grad_hes_grad = (sample_weight.reshape(sample_weight.shape[0], -1) * grad_hes_grad).sum(dim=0)

            # Compute optimal step length
            alpha_num = (weights_grad * weights_grad).sum(dim=(1,2,3))
            alpha_den = (grad_hes_grad + (reg_weight + self.alpha_eps) * alpha_num).clamp(1e-8)
            alpha = alpha_num / alpha_den

            # Update filter
            weights = weights - (step_length_factor * alpha.reshape(-1, 1, 1, 1)) * weights_grad

            # Add the weight iterate
            weight_iterates.append(weights)

        if compute_losses:
            scores = filter_layer.apply_filter(feat, weights)
            losses.append(_compute_loss(scores, weights))

        return weights, weight_iterates, losses
예제 #10
0
    def forward(self,
                filter,
                feat,
                label,
                compute_losses=True,
                sample_weight=None,
                num_iter=None,
                train_bb=None,
                is_distractor=None,
                test_feat=None,
                test_label=None,
                test_anno=None):
        if num_iter is None:
            num_iter = self.num_iter

        num_images = feat.shape[0]
        num_sequences = feat.shape[1] if feat.dim() == 5 else 1
        filter_sz = (filter.shape[-2], filter.shape[-1])

        step_length = torch.exp(self.log_step_length)
        reg_weight = self.filter_reg * self.filter_reg

        # Compute distance map
        center = ((train_bb[..., :2] + train_bb[..., 2:] / 2) /
                  self.feat_stride).view(-1, 2).flip((1, ))
        if is_distractor is not None:
            center[is_distractor.view(-1), :] = 99999
        dist_map = self.distance_map(center, label.shape[-2:])

        # Compute label map masks and weight
        label_map = self.label_map_predictor(dist_map).view(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])
        target_mask = self.target_mask_predictor(dist_map).view(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])
        spatial_weight = self.spatial_weight_predictor(dist_map).view(
            num_images, num_sequences, dist_map.shape[-2], dist_map.shape[-1])

        background_mask = 1.0 - target_mask
        if sample_weight is None:
            sample_weight = (1.0 / feat.shape[0]) * (spatial_weight *
                                                     spatial_weight)
        elif isinstance(sample_weight, torch.Tensor):
            sample_weight = sample_weight.view(
                -1, 1, 1, 1) * (spatial_weight * spatial_weight)

        losses = {'train': [], 'test': []}

        for i in range(num_iter):
            # Compute gradient
            scores = filter_layer.apply_filter(feat, filter)
            scores = target_mask * scores + background_mask * F.relu(scores)
            score_mask = (scores.detach() >
                          0).float() * background_mask + target_mask
            residuals = sample_weight * (scores - label_map)
            filter_grad = filter_layer.apply_feat_transpose(feat, residuals, filter_sz, training=self.training) + \
                          reg_weight * filter

            # Map the gradient
            scores_grad = filter_layer.apply_filter(feat, filter_grad)
            scores_grad = sample_weight * (score_mask * scores_grad)
            filter_q = filter_layer.apply_feat_transpose(feat, scores_grad, filter_sz, training=self.training) + \
                       reg_weight * filter_grad

            # Compute step length
            alpha_num = (filter_grad * filter_grad).view(filter.shape[0],
                                                         -1).sum(dim=1)
            alpha_den = (filter_grad * filter_q).view(
                filter.shape[0], -1).sum(dim=1).abs().clamp(1e-4)
            alpha = alpha_num / alpha_den

            # Update filter
            filter = filter - (step_length *
                               alpha.view(-1, 1, 1, 1)) * filter_grad

            if compute_losses:
                losses['train'].append(
                    (sample_weight * (scores - label_map)**2).mean())
                if test_feat is not None:
                    losses['test'].append(
                        self._compute_test_loss(filter, test_feat, test_label,
                                                test_anno))

        if compute_losses:
            scores = filter_layer.apply_filter(feat, filter)
            scores = target_mask * scores + background_mask * F.relu(scores)
            losses['train'].append(
                (sample_weight * (scores - label_map)**2).mean())
            if test_feat is not None:
                losses['test'].append(
                    self._compute_test_loss(filter, test_feat, test_label,
                                            test_anno))

        return filter, losses
예제 #11
0
 def _compute_test_loss(self, filter, feat, label, target_bb=None):
     scores = filter_layer.apply_filter(feat, filter)
     return self.test_loss(scores, label, target_bb)
예제 #12
0
 def apply_target_model(self, weights, feat):
     """ Apply the target model to obtain the mask encodings"""
     mask_encoding = filter_layer.apply_filter(
         feat, weights, dilation_factors=self.filter_dilation_factors)
     return mask_encoding