Ejemplo n.º 1
0
    def loss_masks(self, outputs, targets, indices, num_boxes):
        assert "pred_masks" in outputs
        # print('---- loss masks ----')

        src_idx = self._get_src_permutation_idx(indices)
        tgt_idx = self._get_tgt_permutation_idx(indices)

        src_masks = outputs["pred_masks"]
        # print('---- src masks ----')
        # print(src_masks[0][0])
        # print('---- targets ----')
        # print(len(targets))
        # print(targets[0]['masks'].shape)
        # print(targets[0]['labels'].shape)
        # TODO use valid to mask invalid areas due to padding in loss
        target_masks, valid = NestedTensor.from_tensor_list(
            [t["masks"] for t in targets]).decompose()
        target_masks = target_masks.to(src_masks)

        src_masks = src_masks[src_idx]
        src_masks = misc_ops.interpolate(src_masks[:, None],
                                         size=target_masks.shape[-3:],
                                         mode="trilinear",
                                         align_corners=False)
        src_masks = src_masks[:, 0].flatten(1)

        target_masks = target_masks[tgt_idx].flatten(1)

        losses = {
            "loss_mask": sigmoid_focal_loss(src_masks, target_masks,
                                            num_boxes),
            "loss_dice": dice_loss(src_masks, target_masks, num_boxes),
        }
        return losses
Ejemplo n.º 2
0
    def forward(self, samples: NestedTensor):
        if not isinstance(samples, NestedTensor):
            samples = NestedTensor.from_tensor_list(samples)
        features, pos = self.detr.backbone(samples)

        bs = features[-1].tensors.shape[0]

        src, mask = features[-1].decompose()
        src_proj = self.detr.input_proj(src)
        hs, memory = self.detr.transformer(src_proj, mask,
                                           self.detr.query_embed.weight,
                                           pos[-1])

        outputs_class = self.detr.class_embed(hs)
        outputs_coord = self.detr.bbox_embed(hs).sigmoid()
        out = {
            "pred_logits": outputs_class[-1],
            "pred_boxes": outputs_coord[-1]
        }
        if self.detr.aux_loss:
            out["aux_outputs"] = [{
                "pred_logits": a,
                "pred_boxes": b
            } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])]

        # FIXME h_boxes takes the last one computed, keep this in mind
        bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask)
        seg_masks = self.mask_head(src_proj, bbox_mask, [features[-1].tensors])
        outputs_seg_masks = seg_masks.view(
            bs,
            self.detr.num_queries,
            seg_masks.shape[-3],
            seg_masks.shape[-2],
            seg_masks.shape[-1],
        )

        out["pred_masks"] = outputs_seg_masks
        return out
Ejemplo n.º 3
0
 def forward(self, samples: NestedTensor):
     print("... DTER Forwarding ... ")
     print(samples.tensors.shape)
     if not isinstance(samples, NestedTensor):
         samples = NestedTensor.from_tensor_list(samples)
     features, pos = self.backbone(samples)
     src, mask = features[-1].decompose()
     # (6, bs, num_queries, hidden_dim)
     hs = self.transformer(self.input_proj(src), mask,
                           self.query_embed.weight, pos[-1])[0]
     print("---- hs size ----")
     print(hs.shape)
     outputs_class = self.class_embed(hs)
     outputs_coord = self.bbox_embed(hs).sigmoid()
     out = {
         "pred_logits": outputs_class[-1],
         "pred_boxes": outputs_coord[-1]
     }
     if self.aux_loss:
         out["aux_outputs"] = [{
             "pred_logits": a,
             "pred_boxes": b
         } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])]
     return out