Esempio n. 1
0
    def forward(
        self,
        images: torch.Tensor,
        boxes: torch.Tensor,
        box_mask: torch.LongTensor,
        classes: torch.Tensor = None,
        segms: torch.Tensor = None,
    ):
        """
        :param images: [batch_size, 3, im_height, im_width]
        :param boxes:  [batch_size, max_num_objects, 4] Padded boxes
        :param box_mask: [batch_size, max_num_objects] Mask for whether or not each box is OK
        :return: object reps [batch_size, max_num_objects, dim]
        """

        images = self.pre_backbone(images)
        images = self.layer2(images)
        images = self.cvm_2(images)
        images = self.layer3(images)
        images = self.cvm_3(images)
        images = self.layer4(images)
        img_feats = self.cvm_4(images)
        box_inds = box_mask.nonzero()
        assert box_inds.shape[0] > 0
        rois = torch.cat((
            box_inds[:, 0, None].type(boxes.dtype),
            boxes[box_inds[:, 0], box_inds[:, 1]],
        ), 1)

        # Object class and segmentation representations
        roi_align_res = self.roi_align(img_feats.float(), rois.float())
        if self.mask_upsample is not None:
            assert segms is not None
            segms_indexed = segms[box_inds[:, 0], None, box_inds[:, 1]] - 0.5
            roi_align_res[:, :self.mask_dims] += self.mask_upsample(
                segms_indexed)

        post_roialign = self.after_roi_align(roi_align_res)

        # Add some regularization, encouraging the model to keep giving decent enough predictions
        obj_logits = self.regularizing_predictor(post_roialign)
        obj_labels = classes[box_inds[:, 0], box_inds[:, 1]]
        cnn_regularization = F.cross_entropy(obj_logits,
                                             obj_labels,
                                             reduction='mean')[None]

        feats_to_downsample = post_roialign if self.object_embed is None else torch.cat(
            (post_roialign, self.object_embed(obj_labels)), -1)
        roi_aligned_feats = self.obj_downsample(feats_to_downsample)

        # Reshape into a padded sequence - this is expensive and annoying but easier to implement and debug...
        obj_reps = pad_sequence(roi_aligned_feats, box_mask.sum(1).tolist())
        return {
            'obj_reps_raw': post_roialign,
            'obj_reps': obj_reps,
            'obj_logits': obj_logits,
            'obj_labels': obj_labels,
            'cnn_regularization_loss': cnn_regularization
        }
Esempio n. 2
0
 def decode_spans(pred_tags: torch.LongTensor,
                  lens: Union[List[int], torch.LongTensor]):
     if isinstance(lens, torch.Tensor):
         lens = lens.tolist()
     batch_pred = defaultdict(list)
     for batch, offset in pred_tags.nonzero(as_tuple=False).tolist():
         batch_pred[batch].append(offset)
     batch_pred_spans = [[(0, l)] for l in lens]
     for batch, offsets in batch_pred.items():
         l = lens[batch]
         batch_pred_spans[batch] = list(zip(offsets, offsets[1:] + [l]))
     return batch_pred_spans
Esempio n. 3
0
    def forward(self,
                images: torch.Tensor,
                boxes: torch.Tensor,
                box_mask: torch.LongTensor,
                classes: torch.Tensor = None,
                segms: torch.Tensor = None,
                ):
        """
        :param images: [batch_size, 3, im_height, im_width]
        :param boxes:  [batch_size, max_num_objects, 4] Padded boxes
        :param box_mask: [batch_size, max_num_objects] Mask for whether or not each box is OK
        :return: object reps [batch_size, max_num_objects, dim]
        """
        # [batch_size, 2048, im_height // 32, im_width // 32
        img_feats = self.backbone(images)
        box_inds = box_mask.nonzero()             # 为一个二维数组,其中每个元素格式为[ 6,  3],代表该GPU中第7个QA对的第四个box不是非零,也就是说不是填充
        assert box_inds.shape[0] > 0              # 确保GPU中至少要有一个box
        rois = torch.cat((
            box_inds[:, 0, None].type(boxes.dtype),
            boxes[box_inds[:, 0], box_inds[:, 1]],
        ), 1)                                # [nbox, 5], 两维数组,第二维第一个元素是box的index,后面的四维是box的坐标

        # Object class and segmentation representations
        roi_align_res = self.roi_align(img_feats, rois)               # torch.Size([nbox, 1024, 7, 7]),相当于每一个position都有1024维的表示
        if self.mask_upsample is not None:
            assert segms is not None
            segms_indexed = segms[box_inds[:, 0], None, box_inds[:, 1]] - 0.5                 # 把所有非padding的segmentation罗列出来 [nbox, 1, 14,14]
            roi_align_res[:, :self.mask_dims] += self.mask_upsample(segms_indexed)            # torch.Size([184, 32, 7, 7])。 这里只在前32维加上segmentation的信息


        post_roialign = self.after_roi_align(roi_align_res)                 # torch.Size([nbox, 2048])

        # Add some regularization, encouraging the model to keep giving decent enough predictions
        obj_logits = self.regularizing_predictor(post_roialign)              # 类似于faster-RCNN一样,在这里预测该box的类别,以此引入类别的语义信息
        obj_labels = classes[box_inds[:, 0], box_inds[:, 1]]
        cnn_regularization = F.cross_entropy(obj_logits, obj_labels, size_average=True)[None]            # tensor([3.2618], device='cuda:1', grad_fn=<UnsqueezeBackward0>)
        # 这里出现了一个warning,但我觉得没影响 UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.

        feats_to_downsample = post_roialign if self.object_embed is None else torch.cat((post_roialign, self.object_embed(obj_labels)), -1)           # 在原有的2048维视觉信息的基础上,又添加了128维的semantic类别信息
        roi_aligned_feats = self.obj_downsample(feats_to_downsample)
        # print('roi_aligned_feats')
        # print(roi_aligned_feats)

        # Reshape into a padded sequence - this is expensive and annoying but easier to implement and debug...这一步就是把这些box表示恢复成batch的格式,padding为全零矩阵
        obj_reps = pad_sequence(roi_aligned_feats, box_mask.sum(1).tolist())
        return {
            'obj_reps_raw': post_roialign,
            'obj_reps': obj_reps,
            'obj_logits': obj_logits,
            'obj_labels': obj_labels,
            'cnn_regularization_loss': cnn_regularization
        }
def activation_loss(x: Tensor, y: LongTensor) -> Tensor:
    device = x.device
    pos = y.nonzero().reshape(-1)
    neg = (y - 1).nonzero().reshape(-1)
    x0, x1 = x[neg], x[pos]
    n0, n1 = x0.size(0), x1.size(0)
    
    a0_x0 = act(x0, zeros(n0, device))
    a1_x0 = act(x0, ones(n0, device))
    
    a1_x1 = act(x1, ones(n1, device))
    a0_x1 = act(x1, zeros(n1, device))
    
    neg_loss = (a0_x0 - 1).abs() + a1_x0
    pos_loss = (a1_x1 - 1).abs() + a0_x1

    return (neg_loss.sum() + pos_loss.sum()) / y.size(0)
Esempio n. 5
0
    def forward(
        self,
        img_feats: torch.Tensor,
        boxes: torch.Tensor,
        box_mask: torch.LongTensor,
        obj_labels: torch.LongTensor,
    ):
        """
        :param images: [batch_size, max_num_objects, 2048]
        :param boxes:  [batch_size, max_num_objects, 7] Padded boxes
        :param box_mask: [batch_size, max_num_objects] Mask for whether or not each box is OK
        :return: object reps [batch_size, max_num_objects, dim]
        """

        box_inds = box_mask.nonzero()
        rois = img_feats[box_inds[:, 0], box_inds[:, 1]]
        if self.semantic:
            aligned_obj_labels = obj_labels[box_inds[:, 0], box_inds[:, 1]]
            rois = torch.cat((rois, self.object_embed(aligned_obj_labels)), -1)

        roi_aligned_feats = self.ln_f(self.obj_downsample(rois))

        if self.use_bbox:
            bboxes = boxes[box_inds[:, 0], box_inds[:, 1]]
            box_feats = self.ln_f(self.bbox_upsample(bboxes))
            roi_aligned_feats = roi_aligned_feats + box_feats

        # Add some regularization, encouraging the model to keep giving decent enough predictions
        # obj_logits = self.regularizing_predictor(roi_aligned_feats)
        # obj_labels = classes[box_inds[:, 0], box_inds[:, 1]]
        # cnn_regularization = F.cross_entropy(obj_logits, obj_labels, size_average=True)[None]

        # Reshape into a padded sequence - this is expensive and annoying but easier to implement and debug...
        obj_reps = pad_sequence(roi_aligned_feats, box_mask.sum(1).tolist())

        return {
            'obj_reps': obj_reps,
            # 'obj_logits': obj_logits,
            # 'obj_labels': obj_labels,
            'cnn_regularization_loss': None  # cnn_regularization
        }
Esempio n. 6
0
    def forward(
        self,
        images: torch.Tensor,
        boxes: torch.Tensor,
        box_mask: torch.LongTensor,
        #classes: torch.Tensor = None,
        #segms: torch.Tensor = None
    ):
        """

        :param images: [batch_size, 3, im_height, im_width
        :param boxes: [batch_size, max_num_objects, 4]
        :param box_mask: [batch_size, max_num_objects]
        :return: [batch_size, max_num_objects, dim]
        """

        img_feats = self.backbone(images)

        box_inds = box_mask.nonzero()  # [num nonzero, 2] (x, y) indices
        assert box_inds.shape[0] > 0  # at least 1 masked index
        rois = torch.cat(
            (
                box_inds[:, 0, None].type(boxes.dtype),  # [x * y, 1]
                boxes[box_inds[:, 0], box_inds[:, 1]]  # boxes[x * y, 4]
            ),
            1)  # [x * y, 1] + [x * y, 4] -> [x * y, 5]

        roi_align_res = self.roi_align(img_feats, rois)

        post_roi_align = self.after_roi_align(roi_align_res)

        #obj_labels = classes[box_inds[:, 0], box_inds[:, 1]]

        roi_aligned_feats = self.obj_downsample(post_roi_align)

        obj_reps = pad_sequence(roi_aligned_feats, box_mask.sum(1).tolist())
        return {
            'obj_reps_raw': post_roi_align,
            'obj_reps': obj_reps,
            #'obj_labels': obj_labels
        }