Ejemplo n.º 1
0
    def forward(self, samples: NestedTensor):
        """ The forward expects a NestedTensor, which consists of:
               - samples.tensor: batched images, of shape [batch_size x 3 x H x W]
               - samples.mask: a binary mask of shape [batch_size x H x W], containing 1 on padded pixels

            It returns a dict with the following elements:
               - "pred_logits": the classification logits (including no-object) for all queries.
                                Shape= [batch_size x num_queries x (num_classes + 1)]
               - "pred_boxes": The normalized boxes coordinates for all queries, represented as
                               (center_x, center_y, height, width). These values are normalized in [0, 1],
                               relative to the size of each individual image (disregarding possible padding).
                               See PostProcess for information on how to retrieve the unnormalized bounding box.
               - "aux_outputs": Optional, only returned when auxilary losses are activated. It is a list of
                                dictionnaries containing the two above keys for each decoder layer.
        """
        if isinstance(samples, (list, torch.Tensor)):
            samples = nested_tensor_from_tensor_list(samples)
        features, pos = self.backbone(samples)

        src, mask = features[-1].decompose()
        assert mask is not None
        hs = self.transformer(self.input_proj(src), mask,
                              self.query_embed.weight, pos[-1])[0]

        outputs_class = self.class_embed(hs)
        outputs_coord = self.bbox_embed(hs).sigmoid()
        out = {
            'pred_logits': outputs_class[-1],
            'pred_boxes': outputs_coord[-1]
        }
        if self.aux_loss:
            out['aux_outputs'] = self._set_aux_loss(outputs_class,
                                                    outputs_coord)
        return out
Ejemplo n.º 2
0
    def forward(self, samples: NestedTensor):
        if isinstance(samples, (list, torch.Tensor)):
            samples = nested_tensor_from_tensor_list(samples)
        features, pos = self.detr.backbone(samples)

        bs = features[-1].tensors.shape[0]

        src, mask = features[-1].decompose()
        assert mask is not None
        src_proj = self.detr.input_proj(src)
        hs, memory = self.detr.transformer(
            src_proj, mask, self.detr.query_embed.weight, pos[-1]
        )

        outputs_class = self.detr.class_embed(hs)
        outputs_coord = self.detr.bbox_embed(hs).sigmoid()
        out = {"pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1]}
        if self.detr.aux_loss:
            out["aux_outputs"] = self.detr._set_aux_loss(outputs_class, outputs_coord)

        # FIXME h_boxes takes the last one computed, keep this in mind
        bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask)

        seg_masks = self.mask_head(
            src_proj,
            bbox_mask,
            [features[2].tensors, features[1].tensors, features[0].tensors],
        )
        outputs_seg_masks = seg_masks.view(
            bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1]
        )

        out["pred_masks"] = outputs_seg_masks
        return out
Ejemplo n.º 3
0
    def run_model(self,
                  model,
                  inputs_list,
                  tolerate_small_mismatch=False,
                  do_constant_folding=True,
                  dynamic_axes=None,
                  output_names=None,
                  input_names=None):
        model.eval()

        onnx_io = io.BytesIO()
        # export to onnx with the first input
        torch.onnx.export(model,
                          inputs_list[0],
                          onnx_io,
                          do_constant_folding=do_constant_folding,
                          opset_version=12,
                          dynamic_axes=dynamic_axes,
                          input_names=input_names,
                          output_names=output_names)
        # validate the exported model with onnx runtime
        for test_inputs in inputs_list:
            with torch.no_grad():
                if isinstance(test_inputs, torch.Tensor) or isinstance(
                        test_inputs, list):
                    test_inputs = (
                        nested_tensor_from_tensor_list(test_inputs), )
                test_ouputs = model(*test_inputs)
                if isinstance(test_ouputs, torch.Tensor):
                    test_ouputs = (test_ouputs, )
            self.ort_validate(onnx_io, test_inputs, test_ouputs,
                              tolerate_small_mismatch)
Ejemplo n.º 4
0
    def loss_masks(self, outputs, targets, indices, num_boxes):
        """Compute the losses related to the masks: the focal loss and the dice loss.
        targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
        """
        assert "pred_masks" in outputs

        src_idx = self._get_src_permutation_idx(indices)
        tgt_idx = self._get_tgt_permutation_idx(indices)
        src_masks = outputs["pred_masks"]
        src_masks = src_masks[src_idx]
        masks = [t["masks"] for t in targets]
        target_masks, valid = nested_tensor_from_tensor_list(masks).decompose()
        target_masks = target_masks.to(src_masks)
        target_masks = target_masks[tgt_idx]

        # upsample predictions to the target size
        src_masks = interpolate(
            src_masks[:, None],
            size=target_masks.shape[-2:],
            mode="bilinear",
            align_corners=False,
        )
        src_masks = src_masks[:, 0].flatten(1)

        target_masks = target_masks.flatten(1)
        target_masks = target_masks.view(src_masks.shape)
        losses = {
            "loss_mask": sigmoid_focal_loss(src_masks, target_masks,
                                            num_boxes),
            "loss_dice": dice_loss(src_masks, target_masks, num_boxes),
        }
        return losses
Ejemplo n.º 5
0
 def _assert_model_output(self, model, scripted_model):
     x = nested_tensor_from_tensor_list(
         [torch.rand(3, 200, 200),
          torch.rand(3, 200, 250)])
     out = model(x)
     out_script = scripted_model(x)
     self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
     self.assertTrue(out["pred_boxes"].equal(out_script["pred_boxes"]))
Ejemplo n.º 6
0
 def test_model_script_detection(self):
     model = detr_resnet50(pretrained=False).eval()
     scripted_model = torch.jit.script(model)
     x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)])
     out = model(x)
     out_script = scripted_model(x)
     self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
     self.assertTrue(out["pred_boxes"].equal(out_script["pred_boxes"]))
Ejemplo n.º 7
0
 def test_model_detection_different_inputs(self):
     model = detr_resnet50(pretrained=False).eval()
     # support NestedTensor
     x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)])
     out = model(x)
     self.assertIn('pred_logits', out)
     # and 4d Tensor
     x = torch.rand(1, 3, 200, 200)
     out = model(x)
     self.assertIn('pred_logits', out)
     # and List[Tensor[C, H, W]]
     x = torch.rand(3, 200, 200)
     out = model([x])
     self.assertIn('pred_logits', out)
Ejemplo n.º 8
0
def unwrap_collate_fn(batch):
    batch = list(zip(*batch))
    batch[0] = nested_tensor_from_tensor_list(batch[0])
    batch[0] = {"tensors": batch[0].tensors, "mask": batch[0].mask}
    return tuple(batch)
Ejemplo n.º 9
0
    def forward(self, samples: NestedTensor):
        """ The forward expects a NestedTensor, which consists of:
               - samples.tensor: batched images, of shape [batch_size x 3 x H x W]
               - samples.mask: a binary mask of shape [batch_size x H x W], containing 1 on padded pixels

            It returns a dict with the following elements:
               - "pred_logits": the classification logits (including no-object) for all queries.
                                Shape= [batch_size x num_queries x (num_classes + 1)]
               - "pred_boxes": The normalized boxes coordinates for all queries, represented as
                               (center_x, center_y, height, width). These values are normalized in [0, 1],
                               relative to the size of each individual image (disregarding possible padding).
                               See PostProcess for information on how to retrieve the unnormalized bounding box.
               - "aux_outputs": Optional, only returned when auxilary losses are activated. It is a list of
                                dictionnaries containing the two above keys for each decoder layer.
        """

        # Global weight norm.
        #params = [p for n, p in self.named_parameters() if ("backbone" not in n and "global_scale" not in n) and p.requires_grad]
        #params = [p for p in self.parameters()]
        #contiguous = torch.cat([p.view(-1) for p in params])
        #mean = contiguous.mean()
        #std = contiguous.std()
        # Initial std: 0.0392
        #print("\n\nstd", std, "\n\n")
        #scale_parameter_member(self, 0.022 / std, mean)
        #scale_parameter_member(self, 0.022 / std)
        #scale_parameter_member(self, self.global_scale / std)
        if not self.scaled:
            self.scaled = True
            ignore = [
                "backbone", "global_scale", "bbox_embed.layers.2",
                "query_embed.weight", "class_embed."
            ]
            named_params = [(n, p) for n, p in self.named_parameters()
                            if all(i not in n
                                   for i in ignore) and p.requires_grad]
            names, params = zip(*named_params)
            contiguous = torch.cat([p.view(-1) for p in params])
            std = contiguous.std()
            with torch.no_grad():
                for param in params:
                    param.data.mul_(0.022 / std)
        # scaled_params = multitensor_scaling((0.022 / std), *params)

        # # param_dict = dict(named_params)
        # # scaled_params = []
        # # for (n, p) in named_params:
        # #     # Replace bias with weight for fan in computation.
        # #     fanin_name = n
        # #     if n.endswith("bias"):
        # #         scaled_params.append(p)
        # #         fanin_name = n[:-4] + "weight"
        # #     fanin_shape = param_dict[fanin_name].shape
        # #     fanin = fanin_shape.numel() / fanin_shape[0]
        # #     scale = sqrt(1/fanin) / std
        # #     print(fanin, float(std), sqrt(1/fanin), float(scale), float((p*scale).std()))
        # #     scaled_params.append(p * scale)

        # #scaled_params = multitensor_scaling((self.global_scale.squeeze() / std), *params)
        # scaled_param_dict = dict(zip(names, scaled_params))

        # # a_names, attention_params = zip(*[(n, p) for n, p in named_params if "in_proj" in n])
        # # rest_names, rest_params = zip(*[(n, p) for n, p in named_params if "in_proj" not in n])

        # # #a_scaled_params = multitensor_scaling(sqrt(0.022) / std.sqrt(), *attention_params)
        # # a_scaled_params = multitensor_scaling((0.022 / std).sqrt(), *attention_params)
        # # rest_scaled_params = multitensor_scaling(0.022 / std, *rest_params)

        # # scaled_param_dict = dict(zip(a_names, a_scaled_params))
        # # scaled_param_dict.update(dict(zip(rest_names, rest_scaled_params)))
        # assign_scaled_parameter(self, scaled_param_dict)

        if isinstance(samples, (list, torch.Tensor)):
            samples = nested_tensor_from_tensor_list(samples)
        features, pos = self.backbone(samples)

        if self.high_def:
            # Use the middle feature map as reference
            reference = 1
            pos = pos[reference]
            src, mask = features[reference].decompose()
            src = self.projections[reference](src)
            for i, (projection,
                    fmap) in enumerate(zip(self.projections, features)):
                if i == reference:
                    continue
                fmap = projection(fmap.tensors)
                src += torch.nn.functional.interpolate(fmap,
                                                       src.shape[2:4],
                                                       mode="bilinear",
                                                       align_corners=True)
        else:
            pos = pos[-1]
            src, mask = features[-1].decompose()
            assert mask is not None
            src = self.input_proj(src)

        if self.bootstrap_steps and self.training:
            stride = sum([self.epoch < e for e in self.bootstrap_steps]) + 1
            self.stride = stride
            if stride > 1:
                x_i, y_i = random.choices(range(stride), k=2)
                src = src[:, :, y_i::stride, x_i::stride]
                mask = mask[:, y_i::stride, x_i::stride]
                pos = pos[:, :, y_i::stride, x_i::stride]

        hs = self.transformer(src, mask, self.query_embed.weight, pos)[0]
        #hs = hs[:, :, :self.num_queries]
        outputs_class = self.class_embed(hs)
        #outputs_coord = self.bbox_embed(hs).sigmoid()
        outputs_coord = self.bbox_embed(hs)
        outputs_coord[..., 2:] = torch.exp(outputs_coord[..., 2:])
        out = {
            'pred_logits': outputs_class[-1],
            'pred_boxes': outputs_coord[-1]
        }
        if self.aux_loss:
            out['aux_outputs'] = self._set_aux_loss(outputs_class,
                                                    outputs_coord)

        #restore_params(self)
        return out
Ejemplo n.º 10
0
 def forward(self, inputs: List[Tensor]):
     sample = nested_tensor_from_tensor_list(inputs)
     return self.model(sample)