def loss_masks(self, outputs, targets, indices, num_boxes): assert "pred_masks" in outputs # print('---- loss masks ----') src_idx = self._get_src_permutation_idx(indices) tgt_idx = self._get_tgt_permutation_idx(indices) src_masks = outputs["pred_masks"] # print('---- src masks ----') # print(src_masks[0][0]) # print('---- targets ----') # print(len(targets)) # print(targets[0]['masks'].shape) # print(targets[0]['labels'].shape) # TODO use valid to mask invalid areas due to padding in loss target_masks, valid = NestedTensor.from_tensor_list( [t["masks"] for t in targets]).decompose() target_masks = target_masks.to(src_masks) src_masks = src_masks[src_idx] src_masks = misc_ops.interpolate(src_masks[:, None], size=target_masks.shape[-3:], mode="trilinear", align_corners=False) src_masks = src_masks[:, 0].flatten(1) target_masks = target_masks[tgt_idx].flatten(1) losses = { "loss_mask": sigmoid_focal_loss(src_masks, target_masks, num_boxes), "loss_dice": dice_loss(src_masks, target_masks, num_boxes), } return losses
def forward(self, samples: NestedTensor): if not isinstance(samples, NestedTensor): samples = NestedTensor.from_tensor_list(samples) features, pos = self.detr.backbone(samples) bs = features[-1].tensors.shape[0] src, mask = features[-1].decompose() src_proj = self.detr.input_proj(src) hs, memory = self.detr.transformer(src_proj, mask, self.detr.query_embed.weight, pos[-1]) outputs_class = self.detr.class_embed(hs) outputs_coord = self.detr.bbox_embed(hs).sigmoid() out = { "pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1] } if self.detr.aux_loss: out["aux_outputs"] = [{ "pred_logits": a, "pred_boxes": b } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] # FIXME h_boxes takes the last one computed, keep this in mind bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask) seg_masks = self.mask_head(src_proj, bbox_mask, [features[-1].tensors]) outputs_seg_masks = seg_masks.view( bs, self.detr.num_queries, seg_masks.shape[-3], seg_masks.shape[-2], seg_masks.shape[-1], ) out["pred_masks"] = outputs_seg_masks return out
def forward(self, samples: NestedTensor): print("... DTER Forwarding ... ") print(samples.tensors.shape) if not isinstance(samples, NestedTensor): samples = NestedTensor.from_tensor_list(samples) features, pos = self.backbone(samples) src, mask = features[-1].decompose() # (6, bs, num_queries, hidden_dim) hs = self.transformer(self.input_proj(src), mask, self.query_embed.weight, pos[-1])[0] print("---- hs size ----") print(hs.shape) outputs_class = self.class_embed(hs) outputs_coord = self.bbox_embed(hs).sigmoid() out = { "pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1] } if self.aux_loss: out["aux_outputs"] = [{ "pred_logits": a, "pred_boxes": b } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] return out