def paste_mask_in_image(mask, box, im_h, im_w): TO_REMOVE = 1 w = int(box[2] - box[0] + TO_REMOVE) h = int(box[3] - box[1] + TO_REMOVE) w = max(w, 1) h = max(h, 1) # Set shape to [batchxCxHxW] mask = mask.expand((1, 1, -1, -1)) # Resize mask mask = misc_nn_ops.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) mask = mask[0][0] im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device) x_0 = max(box[0], 0) x_1 = min(box[2] + 1, im_w) y_0 = max(box[1], 0) y_1 = min(box[3] + 1, im_h) im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])] return im_mask
def resize(self, image, target): h, w = image.shape[-2:] min_size = float(min(image.shape[-2:])) max_size = float(max(image.shape[-2:])) if self.training: size = random.choice(self.min_size) else: # FIXME assume for now that testing uses the largest scale size = self.min_size[-1] scale_factor = size / min_size if max_size * scale_factor > self.max_size: scale_factor = self.max_size / max_size image = torch.nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target bbox = target["boxes"] bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) target["boxes"] = bbox if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate( mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask if "keypoints" in target: keypoints = target["keypoints"] keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) target["keypoints"] = keypoints return image, target
def resize(self, image, target): # type: (Tensor, Optional[Dict[str, Tensor]]) # import pdb; pdb.set_trace() h, w = image.shape[-2:] im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) max_size = float(torch.max(im_shape)) if self.training: size = float(self.torch_choice(self.min_size)) # scale_factor = size / min_size else: # scale_factor = 1 # FIXME assume for now that testing uses the largest scale size = float(self.min_size[-1]) scale_factor = size / min_size if max_size * scale_factor > self.max_size: scale_factor = self.max_size / max_size image = torch.nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target bbox = target["boxes"] bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) target["boxes"] = bbox if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate( mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask return image, target
def forward(self, x): x = self.kps_score_lowres(x) x = misc_nn_ops.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) return x
def loss_masks(self, outputs, targets, indices, num_boxes): assert "pred_masks" in outputs # print('---- loss masks ----') src_idx = self._get_src_permutation_idx(indices) tgt_idx = self._get_tgt_permutation_idx(indices) src_masks = outputs["pred_masks"] # print('---- src masks ----') # print(src_masks[0][0]) # print('---- targets ----') # print(len(targets)) # print(targets[0]['masks'].shape) # print(targets[0]['labels'].shape) # TODO use valid to mask invalid areas due to padding in loss target_masks, valid = NestedTensor.from_tensor_list( [t["masks"] for t in targets]).decompose() target_masks = target_masks.to(src_masks) src_masks = src_masks[src_idx] src_masks = misc_ops.interpolate(src_masks[:, None], size=target_masks.shape[-3:], mode="trilinear", align_corners=False) src_masks = src_masks[:, 0].flatten(1) target_masks = target_masks[tgt_idx].flatten(1) losses = { "loss_mask": sigmoid_focal_loss(src_masks, target_masks, num_boxes), "loss_dice": dice_loss(src_masks, target_masks, num_boxes), } return losses
def paste_mask_in_image(mask, box, im_h, im_w): # type: (Tensor, Tensor, int, int) """ :param mask: Tensor [28,28] 值 0~1 还未做二值化 :param box: Tensor [1,4] 大小已经归一到原始输入图像大小 :param im_h: int 原始输入图像 高 :param im_w: int 原始输入图像 宽 :return: Tensor [im_h, im_w] """ TO_REMOVE = 1 w = int(box[2] - box[0] + TO_REMOVE) h = int(box[3] - box[1] + TO_REMOVE) w = max(w, 1) h = max(h, 1) # Set shape to [batchxCxHxW] mask = mask.expand((1, 1, -1, -1)) # Resize mask mask = misc_nn_ops.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) mask = mask[0][0] im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device) x_0 = max(box[0], 0) x_1 = min(box[2] + 1, im_w) y_0 = max(box[1], 0) y_1 = min(box[3] + 1, im_h) im_mask[y_0:y_1, x_0:x_1] = mask[ (y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0]) ] return im_mask
def unmold_maskV2(mask, bbox, image_shape,origin_shape=None): """Converts a mask generated by the neural network to a format similar to its original shape. mask: [height, width] of type float. A small, typically 28x28 mask. 已使用过 sigmoid() bbox: [y1, x1, y2, x2]. The box to fit the mask in. Returns a binary mask with the same size as the original image. """ # threshold = 0.5 # mask = mask >= threshold x1, y1, x2, y2 = bbox mask = misc_nn_ops.interpolate(mask[None,None].float(), size=(y2 - y1, x2 - x1), mode="bilinear",align_corners=False)[0,0]#.byte() # Put the mask in the right location. full_mask = torch.zeros(image_shape, dtype=torch.float32,device=mask.device) full_mask[y1:y2, x1:x2] = mask if origin_shape is not None: full_mask = misc_nn_ops.interpolate(full_mask[None,None].float(), size=origin_shape, mode="bilinear",align_corners=False)[0,0]#.byte() # return full_mask >= threshold return full_mask # 0.~1.
def forward(self, x): x = self.kps_score_lowres(x) x = misc_nn_ops.interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) #Softmax around each map. I will use this in the loss function based on https://www.robots.ox.ac.uk/~vgg/publications/2018/Neumann18a/neumann18a.pdf n_batch, n_channels, w, h = x.shape hh = x.contiguous().view(n_batch, n_channels, -1) x_logits = nn.functional.log_softmax(hh, dim=2) x_logits = x_logits.view(n_batch, n_channels, w, h) return x_logits
def unmold_mask(mask, bbox, image_shape,origin_shape=None): """Converts a mask generated by the neural network to a format similar to its original shape. mask: [height, width] of type float. A small, typically 28x28 mask. 已使用过 sigmoid() bbox: [y1, x1, y2, x2]. The box to fit the mask in. Returns a binary mask with the same size as the original image. 如果是 二值 如:0,1 使用 "nearest"插值 如果是 连续值 如:0.0~1.0 or 0,1,2...,255 使用 "bilinear"插值 """ threshold = 0.5 mask = mask >= threshold x1, y1, x2, y2 = bbox mask = misc_nn_ops.interpolate(mask[None,None].float(), size=(y2 - y1, x2 - x1), mode="nearest")[0,0]#.byte() # Put the mask in the right location. full_mask = torch.zeros(image_shape, dtype=torch.bool,device=mask.device) full_mask[y1:y2, x1:x2] = mask if origin_shape is not None: full_mask = misc_nn_ops.interpolate(full_mask[None,None].float(), size=origin_shape, mode="nearest")[0,0]#.byte() return full_mask
def __call__(self, img,target): """ :param image: PIL image :param target: Tensor :return: image: PIL image target: Tensor """ img = np.asarray(img) img_h, img_w = img.shape[:2] target["original_size"] = torch.as_tensor((img_h, img_w),dtype=torch.float32) # 按最小边填充 min_size = min(img_w, img_h) max_size = max(img_w, img_h) scale = self.min_size/min_size if max_size*scale>self.max_size: scale = self.max_size /max_size new_w = int(scale * img_w) new_h = int(scale * img_h) target["resize"] = torch.as_tensor((new_h,new_w,scale), dtype=torch.float32) # img = scipy.misc.imresize(img, [new_h,new_w], 'bicubic') # or 'cubic' img = cv2.resize(img,(new_w,new_h),interpolation=cv2.INTER_CUBIC) if "boxes" in target: boxes = target["boxes"] boxes = resize_boxes(boxes, (img_h, img_w), (new_h,new_w)) target["boxes"] = boxes if "masks" in target and target["masks"] is not None: target["masks"] = misc_nn_ops.interpolate(target["masks"][None].float(), size=(new_h,new_w), mode="nearest")[0].byte()#.permute(1,2,0) if "keypoints" in target: keypoints = target["keypoints"] keypoints = resize_keypoints(keypoints,(img_h, img_w), (new_h,new_w)) target["keypoints"] = keypoints return PIL.Image.fromarray(img),target
def _resize_image_and_masks(image, self_min_size, self_max_size, target): # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) max_size = float(torch.max(im_shape)) scale_factor = self_min_size / min_size if max_size * scale_factor > self_max_size: scale_factor = self_max_size / max_size image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask return image, target
def __call__(self, img,target): """ :param image: PIL image :param target: Tensor :return: image: PIL image target: Tensor """ if self.multi_scale: choice_size = random.choice(self.multi_scale_size) self.size = (choice_size, choice_size) img = np.asarray(img) original_size = img.shape[:2] # target["original_size"] = torch.as_tensor(original_size) target["resize"] = torch.as_tensor(self.size,dtype=torch.float32) # img = scipy.misc.imresize(img,self.size,'bicubic') # or 'cubic' img = cv2.resize(img, self.size, interpolation=cv2.INTER_CUBIC) if "masks" in target and target["masks"] is not None: target["masks"] = misc_nn_ops.interpolate(target["masks"][None].float(), size=self.size, mode="nearest")[0].byte()#.permute(1,2,0) if "boxes" in target: boxes = target["boxes"] boxes = resize_boxes(boxes,original_size,self.size) target["boxes"] = boxes if "keypoints" in target: keypoints = target["keypoints"] keypoints = resize_keypoints(keypoints,original_size,self.size) target["keypoints"] = keypoints return PIL.Image.fromarray(img), target
def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target): # type: (Tensor, float, float, Optional[Dict[str, Tensor]]) -> Tuple[Tensor, Optional[Dict[str, Tensor]]] from torch.onnx import operators im_shape = operators.shape_as_tensor(image)[-2:] min_size = torch.min(im_shape).to(dtype=torch.float32) max_size = torch.max(im_shape).to(dtype=torch.float32) scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size) image = torch.nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask return image, target
def loss_masks(self, outputs, targets, indices, num_boxes): """ Compute the losses related to the masks: the focal loss and the dice loss. targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w] """ # assert "pred_masks" in outputs src_idx = self._get_src_permutation_idx(indices) tgt_idx = self._get_tgt_permutation_idx(indices) src_masks = outputs["pred_masks"] # TODO use valid to mask invalid areas due to padding in loss target_masks, valid = nested_tensor_from_tensor_list( [t["masks"] for t in targets]).decompose() target_masks = target_masks.to(src_masks) src_masks = src_masks[src_idx] # upsample predictions to the target size src_masks = interpolate( src_masks[:, None], size=target_masks.shape[-2:], mode="bilinear", align_corners=False, ) src_masks = src_masks[:, 0].flatten(1) target_masks = target_masks[tgt_idx].flatten(1) focal_loss = sigmoid_focal_loss(src_masks, target_masks) box_norm_focal_loss = focal_loss.mean(1).sum() / num_boxes norm_dice_loss = dice_loss(src_masks, target_masks) / num_boxes losses = { "loss_mask": box_norm_focal_loss, "loss_dice": norm_dice_loss, } return losses
def resize(image, target, size, max_size=None): # size can be min_size (scalar) or (w, h) tuple def get_size_with_aspect_ratio(image_size, size, max_size=None): w, h = image_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int( round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def get_size(image_size, size, max_size=None): if isinstance(size, (list, tuple)): return size[::-1] else: return get_size_with_aspect_ratio(image_size, size, max_size) size = get_size(image.size, size, max_size) rescaled_image = F.resize(image, size) if target is None: return rescaled_image, None ratios = tuple( float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) ratio_width, ratio_height = ratios target = target.copy() if "boxes" in target: boxes = target["boxes"] scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height]) target["boxes"] = scaled_boxes if "area" in target: area = target["area"] scaled_area = area * (ratio_width * ratio_height) target["area"] = scaled_area h, w = size target["size"] = torch.tensor([h, w]) if "masks" in target: target['masks'] = interpolate( target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5 return rescaled_image, target