def paste_mask_in_image(mask, box, im_h, im_w):
    TO_REMOVE = 1
    w = int(box[2] - box[0] + TO_REMOVE)
    h = int(box[3] - box[1] + TO_REMOVE)
    w = max(w, 1)
    h = max(h, 1)

    # Set shape to [batchxCxHxW]
    mask = mask.expand((1, 1, -1, -1))

    # Resize mask
    mask = misc_nn_ops.interpolate(mask,
                                   size=(h, w),
                                   mode='bilinear',
                                   align_corners=False)
    mask = mask[0][0]

    im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
    x_0 = max(box[0], 0)
    x_1 = min(box[2] + 1, im_w)
    y_0 = max(box[1], 0)
    y_1 = min(box[3] + 1, im_h)

    im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]):(y_1 - box[1]),
                                     (x_0 - box[0]):(x_1 - box[0])]
    return im_mask
    def resize(self, image, target):
        h, w = image.shape[-2:]
        min_size = float(min(image.shape[-2:]))
        max_size = float(max(image.shape[-2:]))
        if self.training:
            size = random.choice(self.min_size)
        else:
            # FIXME assume for now that testing uses the largest scale
            size = self.min_size[-1]
        scale_factor = size / min_size
        if max_size * scale_factor > self.max_size:
            scale_factor = self.max_size / max_size
        image = torch.nn.functional.interpolate(image[None],
                                                scale_factor=scale_factor,
                                                mode='bilinear',
                                                align_corners=False)[0]

        if target is None:
            return image, target

        bbox = target["boxes"]
        bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
        target["boxes"] = bbox

        if "masks" in target:
            mask = target["masks"]
            mask = misc_nn_ops.interpolate(
                mask[None].float(), scale_factor=scale_factor)[0].byte()
            target["masks"] = mask

        if "keypoints" in target:
            keypoints = target["keypoints"]
            keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
            target["keypoints"] = keypoints
        return image, target
Exemple #3
0
    def resize(self, image, target):
        # type: (Tensor, Optional[Dict[str, Tensor]])
        # import pdb; pdb.set_trace()
        h, w = image.shape[-2:]
        im_shape = torch.tensor(image.shape[-2:])
        min_size = float(torch.min(im_shape))
        max_size = float(torch.max(im_shape))
        if self.training:
            size = float(self.torch_choice(self.min_size))
            # scale_factor = size / min_size
        else:
            # scale_factor = 1
            # FIXME assume for now that testing uses the largest scale
            size = float(self.min_size[-1])
        scale_factor = size / min_size
        if max_size * scale_factor > self.max_size:
            scale_factor = self.max_size / max_size
        image = torch.nn.functional.interpolate(image[None],
                                                scale_factor=scale_factor,
                                                mode='bilinear',
                                                align_corners=False)[0]

        if target is None:
            return image, target

        bbox = target["boxes"]
        bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
        target["boxes"] = bbox

        if "masks" in target:
            mask = target["masks"]
            mask = misc_nn_ops.interpolate(
                mask[None].float(), scale_factor=scale_factor)[0].byte()
            target["masks"] = mask
        return image, target
Exemple #4
0
 def forward(self, x):
     x = self.kps_score_lowres(x)
     x = misc_nn_ops.interpolate(x,
                                 scale_factor=self.up_scale,
                                 mode="bilinear",
                                 align_corners=False)
     return x
Exemple #5
0
    def loss_masks(self, outputs, targets, indices, num_boxes):
        assert "pred_masks" in outputs
        # print('---- loss masks ----')

        src_idx = self._get_src_permutation_idx(indices)
        tgt_idx = self._get_tgt_permutation_idx(indices)

        src_masks = outputs["pred_masks"]
        # print('---- src masks ----')
        # print(src_masks[0][0])
        # print('---- targets ----')
        # print(len(targets))
        # print(targets[0]['masks'].shape)
        # print(targets[0]['labels'].shape)
        # TODO use valid to mask invalid areas due to padding in loss
        target_masks, valid = NestedTensor.from_tensor_list(
            [t["masks"] for t in targets]).decompose()
        target_masks = target_masks.to(src_masks)

        src_masks = src_masks[src_idx]
        src_masks = misc_ops.interpolate(src_masks[:, None],
                                         size=target_masks.shape[-3:],
                                         mode="trilinear",
                                         align_corners=False)
        src_masks = src_masks[:, 0].flatten(1)

        target_masks = target_masks[tgt_idx].flatten(1)

        losses = {
            "loss_mask": sigmoid_focal_loss(src_masks, target_masks,
                                            num_boxes),
            "loss_dice": dice_loss(src_masks, target_masks, num_boxes),
        }
        return losses
Exemple #6
0
def paste_mask_in_image(mask, box, im_h, im_w):
    # type: (Tensor, Tensor, int, int)
    """
    :param mask: Tensor [28,28] 值 0~1 还未做二值化
    :param box: Tensor [1,4] 大小已经归一到原始输入图像大小
    :param im_h: int  原始输入图像 高
    :param im_w: int  原始输入图像 宽
    :return: Tensor [im_h, im_w]
    """

    TO_REMOVE = 1
    w = int(box[2] - box[0] + TO_REMOVE)
    h = int(box[3] - box[1] + TO_REMOVE)
    w = max(w, 1)
    h = max(h, 1)

    # Set shape to [batchxCxHxW]
    mask = mask.expand((1, 1, -1, -1))

    # Resize mask
    mask = misc_nn_ops.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False)
    mask = mask[0][0]

    im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
    x_0 = max(box[0], 0)
    x_1 = min(box[2] + 1, im_w)
    y_0 = max(box[1], 0)
    y_1 = min(box[3] + 1, im_h)

    im_mask[y_0:y_1, x_0:x_1] = mask[
        (y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])
    ]
    return im_mask
Exemple #7
0
def unmold_maskV2(mask, bbox, image_shape,origin_shape=None):
    """Converts a mask generated by the neural network to a format similar
    to its original shape.
    mask: [height, width] of type float. A small, typically 28x28 mask. 已使用过 sigmoid()
    bbox: [y1, x1, y2, x2]. The box to fit the mask in.
    Returns a binary mask with the same size as the original image.
    """
    # threshold = 0.5
    # mask = mask >= threshold

    x1, y1, x2, y2 = bbox
    mask = misc_nn_ops.interpolate(mask[None,None].float(), size=(y2 - y1, x2 - x1), mode="bilinear",align_corners=False)[0,0]#.byte()

    # Put the mask in the right location.
    full_mask = torch.zeros(image_shape, dtype=torch.float32,device=mask.device)
    full_mask[y1:y2, x1:x2] = mask

    if origin_shape is not None:
        full_mask = misc_nn_ops.interpolate(full_mask[None,None].float(), size=origin_shape, mode="bilinear",align_corners=False)[0,0]#.byte()

    # return full_mask >= threshold
    return full_mask # 0.~1.
Exemple #8
0
    def forward(self, x):
        x = self.kps_score_lowres(x)
        x = misc_nn_ops.interpolate(x,
                                    scale_factor=self.up_scale,
                                    mode="bilinear",
                                    align_corners=False)

        #Softmax around each map. I will use this in the loss function based on https://www.robots.ox.ac.uk/~vgg/publications/2018/Neumann18a/neumann18a.pdf
        n_batch, n_channels, w, h = x.shape
        hh = x.contiguous().view(n_batch, n_channels, -1)
        x_logits = nn.functional.log_softmax(hh, dim=2)
        x_logits = x_logits.view(n_batch, n_channels, w, h)

        return x_logits
Exemple #9
0
def unmold_mask(mask, bbox, image_shape,origin_shape=None):
    """Converts a mask generated by the neural network to a format similar
    to its original shape.
    mask: [height, width] of type float. A small, typically 28x28 mask. 已使用过 sigmoid()
    bbox: [y1, x1, y2, x2]. The box to fit the mask in.
    Returns a binary mask with the same size as the original image.

    如果是 二值 如:0,1 使用 "nearest"插值
    如果是 连续值 如:0.0~1.0 or 0,1,2...,255 使用 "bilinear"插值
    """
    threshold = 0.5
    mask = mask >= threshold

    x1, y1, x2, y2 = bbox
    mask = misc_nn_ops.interpolate(mask[None,None].float(), size=(y2 - y1, x2 - x1), mode="nearest")[0,0]#.byte()

    # Put the mask in the right location.
    full_mask = torch.zeros(image_shape, dtype=torch.bool,device=mask.device)
    full_mask[y1:y2, x1:x2] = mask

    if origin_shape is not None:
        full_mask = misc_nn_ops.interpolate(full_mask[None,None].float(), size=origin_shape, mode="nearest")[0,0]#.byte()

    return full_mask
Exemple #10
0
    def __call__(self, img,target):
        """
        :param image: PIL image
        :param target: Tensor
        :return:
                image: PIL image
                target: Tensor
        """
        img = np.asarray(img)
        img_h, img_w = img.shape[:2]

        target["original_size"] = torch.as_tensor((img_h, img_w),dtype=torch.float32)

        # 按最小边填充
        min_size = min(img_w, img_h)
        max_size = max(img_w, img_h)
        scale = self.min_size/min_size
        if max_size*scale>self.max_size:
            scale = self.max_size /max_size

        new_w = int(scale * img_w)
        new_h = int(scale * img_h)

        target["resize"] = torch.as_tensor((new_h,new_w,scale), dtype=torch.float32)

        # img = scipy.misc.imresize(img, [new_h,new_w], 'bicubic')  # or 'cubic'
        img = cv2.resize(img,(new_w,new_h),interpolation=cv2.INTER_CUBIC)

        if "boxes" in target:
            boxes = target["boxes"]
            boxes = resize_boxes(boxes, (img_h, img_w), (new_h,new_w))
            target["boxes"] = boxes

        if "masks" in target and target["masks"] is not None:
            target["masks"] = misc_nn_ops.interpolate(target["masks"][None].float(),
                                                      size=(new_h,new_w), mode="nearest")[0].byte()#.permute(1,2,0)


        if "keypoints" in target:
            keypoints = target["keypoints"]
            keypoints = resize_keypoints(keypoints,(img_h, img_w), (new_h,new_w))
            target["keypoints"] = keypoints


        return PIL.Image.fromarray(img),target
Exemple #11
0
def _resize_image_and_masks(image, self_min_size, self_max_size, target):
    # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
    im_shape = torch.tensor(image.shape[-2:])
    min_size = float(torch.min(im_shape))
    max_size = float(torch.max(im_shape))
    scale_factor = self_min_size / min_size
    if max_size * scale_factor > self_max_size:
        scale_factor = self_max_size / max_size
    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode='bilinear',
        align_corners=False)[0]

    if target is None:
        return image, target

    if "masks" in target:
        mask = target["masks"]
        mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte()
        target["masks"] = mask
    return image, target
Exemple #12
0
    def __call__(self, img,target):
        """
        :param image: PIL image
        :param target: Tensor
        :return:
                image: PIL image
                target: Tensor
        """
        if self.multi_scale:
            choice_size = random.choice(self.multi_scale_size)
            self.size = (choice_size, choice_size)

        img = np.asarray(img)
        original_size = img.shape[:2]
        # target["original_size"] = torch.as_tensor(original_size)
        target["resize"] = torch.as_tensor(self.size,dtype=torch.float32)

        # img = scipy.misc.imresize(img,self.size,'bicubic') #  or 'cubic'
        img = cv2.resize(img, self.size, interpolation=cv2.INTER_CUBIC)

        if "masks" in target and target["masks"] is not None:
            target["masks"] = misc_nn_ops.interpolate(target["masks"][None].float(), size=self.size,
                                                      mode="nearest")[0].byte()#.permute(1,2,0)


        if "boxes" in target:
            boxes = target["boxes"]

            boxes = resize_boxes(boxes,original_size,self.size)

            target["boxes"] = boxes

        if "keypoints" in target:
            keypoints = target["keypoints"]
            keypoints = resize_keypoints(keypoints,original_size,self.size)
            target["keypoints"] = keypoints

        return PIL.Image.fromarray(img), target
Exemple #13
0
def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target):
    # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]
    from torch.onnx import operators
    im_shape = operators.shape_as_tensor(image)[-2:]
    min_size = torch.min(im_shape).to(dtype=torch.float32)
    max_size = torch.max(im_shape).to(dtype=torch.float32)
    scale_factor = torch.min(self_min_size / min_size,
                             self_max_size / max_size)

    image = torch.nn.functional.interpolate(image[None],
                                            scale_factor=scale_factor,
                                            mode='bilinear',
                                            align_corners=False)[0]

    if target is None:
        return image, target

    if "masks" in target:
        mask = target["masks"]
        mask = misc_nn_ops.interpolate(mask[None].float(),
                                       scale_factor=scale_factor)[0].byte()
        target["masks"] = mask
    return image, target
Exemple #14
0
    def loss_masks(self, outputs, targets, indices, num_boxes):
        """
        Compute the losses related to the masks: the focal loss and the dice loss.
        targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
        """
        # assert "pred_masks" in outputs

        src_idx = self._get_src_permutation_idx(indices)
        tgt_idx = self._get_tgt_permutation_idx(indices)

        src_masks = outputs["pred_masks"]

        # TODO use valid to mask invalid areas due to padding in loss
        target_masks, valid = nested_tensor_from_tensor_list(
            [t["masks"] for t in targets]).decompose()
        target_masks = target_masks.to(src_masks)

        src_masks = src_masks[src_idx]
        # upsample predictions to the target size
        src_masks = interpolate(
            src_masks[:, None],
            size=target_masks.shape[-2:],
            mode="bilinear",
            align_corners=False,
        )
        src_masks = src_masks[:, 0].flatten(1)

        target_masks = target_masks[tgt_idx].flatten(1)
        focal_loss = sigmoid_focal_loss(src_masks, target_masks)
        box_norm_focal_loss = focal_loss.mean(1).sum() / num_boxes
        norm_dice_loss = dice_loss(src_masks, target_masks) / num_boxes
        losses = {
            "loss_mask": box_norm_focal_loss,
            "loss_dice": norm_dice_loss,
        }
        return losses
Exemple #15
0
def resize(image, target, size, max_size=None):
    # size can be min_size (scalar) or (w, h) tuple

    def get_size_with_aspect_ratio(image_size, size, max_size=None):
        w, h = image_size
        if max_size is not None:
            min_original_size = float(min((w, h)))
            max_original_size = float(max((w, h)))
            if max_original_size / min_original_size * size > max_size:
                size = int(
                    round(max_size * min_original_size / max_original_size))

        if (w <= h and w == size) or (h <= w and h == size):
            return (h, w)

        if w < h:
            ow = size
            oh = int(size * h / w)
        else:
            oh = size
            ow = int(size * w / h)

        return (oh, ow)

    def get_size(image_size, size, max_size=None):
        if isinstance(size, (list, tuple)):
            return size[::-1]
        else:
            return get_size_with_aspect_ratio(image_size, size, max_size)

    size = get_size(image.size, size, max_size)
    rescaled_image = F.resize(image, size)

    if target is None:
        return rescaled_image, None

    ratios = tuple(
        float(s) / float(s_orig)
        for s, s_orig in zip(rescaled_image.size, image.size))
    ratio_width, ratio_height = ratios

    target = target.copy()
    if "boxes" in target:
        boxes = target["boxes"]
        scaled_boxes = boxes * torch.as_tensor(
            [ratio_width, ratio_height, ratio_width, ratio_height])
        target["boxes"] = scaled_boxes

    if "area" in target:
        area = target["area"]
        scaled_area = area * (ratio_width * ratio_height)
        target["area"] = scaled_area

    h, w = size
    target["size"] = torch.tensor([h, w])

    if "masks" in target:
        target['masks'] = interpolate(
            target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5

    return rescaled_image, target