Esempio n. 1
    def forward(self, x, boxes):
            x (Tensor): the mask logits
            boxes (list[BoxList]): bounding boxes that are used as
                reference, one for ech image

            results (list[BoxList]): one BoxList for each image, containing
                the extra field mask
        mask_prob = x.sigmoid()

        # select masks coresponding to the predicted classes
        num_masks = x.shape[0]
        labels = [bbox.get_field("labels") for bbox in boxes]
        labels =
        index = torch.arange(num_masks, device=labels.device)
        mask_prob = mask_prob[index, labels][:, None]

        boxes_per_image = [len(box) for box in boxes]
        mask_prob = mask_prob.split(boxes_per_image, dim=0)

        if self.masker:
            mask_prob = self.masker(mask_prob, boxes)

        results = []
        for prob, box in zip(mask_prob, boxes):
            bbox = BoxList(box.bbox, box.size, mode="xyxy")
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            bbox.add_field("mask", prob)

        return results
Esempio n. 2
    def get_groundtruth(self, index):
        img_id = self.ids[index]
        anno = ET.parse(self._annopath % img_id).getroot()
        anno = self._preprocess_annotation(anno)

        height, width = anno["im_info"]
        target = BoxList(anno["boxes"], (width, height), mode="xyxy")
        target.add_field("labels", anno["labels"])
        target.add_field("difficult", anno["difficult"])
        return target
Esempio n. 3
    def get_groundtruth(self, filename, width, height, aug_det):
        anno = self._preprocess_annotation(self.ann_info[filename], aug_det)

        target = BoxList(anno["boxes"], (width, height), mode="xyxy")  # .convert("xyxy")
        target.add_field("labels", anno["labels"])

        masks = SegmentationMask(anno["masks"], (width, height), type=self.mask_type)
        target.add_field("masks", masks)

        return target
    def get_groundtruth(self, filename):

        # filename = '1.2.840.113619.2.256.896737935203.1468394276.4947'
        img = cv2.imread(self.img_dict[filename], cv2.IMREAD_COLOR)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img, ratio, pad = self.img_resize_keep_aspect_ratio_with_padding(img)
        height, width = img.shape[:2]

        boxes = []
        masks = []
        gt_classes = []

        for ann_info in self.ann_info[filename]:
            cls_num, mask_file = ann_info
            mask = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)

            # 하나의 클래스의 여러 분절된 세그먼테이션을 모두 포함하는 바운딩 박스를 구함
            bbox_points, mask_points = self.find_bounding_square(mask)
            bbox_points, mask_points = self.apply_bb_pts_ratio_pad(
                bbox_points, mask_points, ratio, pad)
            mask_points = self.simple_pts(mask_points)

            x1, y1, x2, y2 = self.getBiggestBoundbox(bbox_points)
            bbox = [x1, y1, x2, y2]

            # # # for debug
            # debug_show_img_pts(self.img_resize_keep_aspect_ratio_with_padding(mask)[0], mask_points)
            # debug_show_img_pts(self.img_resize_keep_aspect_ratio_with_padding(mask)[0], [bbox])
            # exit()


        img, boxes, masks = img_and_key_point_augmentation(
            self.augmentation, img, boxes, masks)

        anno = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "masks": masks,
            "labels": torch.tensor(gt_classes),

        target = BoxList(anno["boxes"], (width, height), mode="xyxy")
        target.add_field("labels", anno["labels"])

        masks = SegmentationMask(anno["masks"], (width, height),
        target.add_field("masks", masks)

        return img, target
Esempio n. 5
    def forward_for_single_feature_map(self, anchors, objectness,
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors =[a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
        return result
Esempio n. 6
    def __getitem__(self, idx):
        filename = self.img_key_list[idx]

        img = cv2.imread(self.img_dict[filename], cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # img =[filename]).convert("RGB")
        height, width = img.shape[:2]

        # target settings
        boxes = []
        temp_masks = []
        masks = []
        gt_classes = []

        for ann_info in self.ann_info[filename]:
            mask = np.zeros((height, width))

            bndbox = ann_info[:4]
            mask[bndbox[1]:bndbox[3], bndbox[0]:bndbox[2]] = 1

            x1, y1, x2, y2 = ann_info[:4]
            temp_mask = [[x1, y1, x1, y2, x2, y2, x2, y1]]


            # 만약 클래스가 번호가 아닌 이름으로 있다면 아래 코드를 사용한다.
            # gt_classes.append(self.class_to_ind[ann_info[-1]])

        img, boxes, temp_masks = img_and_key_point_augmentation(
            self.augmentation, img, boxes, temp_masks)
        img = Image.fromarray(img, mode="RGB")

        target = BoxList(torch.tensor(boxes, dtype=torch.float32),
                         (width, height),
        target.add_field("labels", torch.tensor(gt_classes))

        seg_masks = SegmentationMask(temp_masks, (width, height),
        target.add_field("masks", seg_masks)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Esempio n. 7
 def prepare_boxlist(self, boxes, scores, image_shape):
     Returns BoxList from `boxes` and adds probability scores information
     as an extra field
     `boxes` has shape (#detections, 4 * #classes), where each row represents
     a list of predicted bounding boxes for each of the object classes in the
     dataset (including the background class). The detections in each row
     originate from the same object proposal.
     `scores` has shape (#detection, #classes), where each row represents a list
     of object detection confidence scores for each of the object classes in the
     dataset (including the background class). `scores[i, j]`` corresponds to the
     box at `boxes[i, j * 4:(j + 1) * 4]`.
     boxes = boxes.reshape(-1, 4)
     scores = scores.reshape(-1)
     boxlist = BoxList(boxes, image_shape, mode="xyxy")
     boxlist.add_field("scores", scores)
     return boxlist
Esempio n. 8
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)

                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                num_labels = len(boxlist_for_class)
                    torch.full((num_labels, ),

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
        return results
Esempio n. 9
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class = boxlist_nms(
                boxlist_for_class, self.nms
            num_labels = len(boxlist_for_class)
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
Esempio n. 10
    def get_groundtruth(self, filename):
        img = cv2.imread(self.img_dict[filename], cv2.IMREAD_COLOR)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.resize_keep_aspect_ratio_with_padding(img)
        height, width = img.shape[:2]

        boxes = []
        masks = []
        gt_classes = []

        for ann_info in self.ann_info[filename]:
            cls_num, mask_file = ann_info
            mask = cv2.imread(mask_file, cv2.IMREAD_GRAYSCALE)
            mask = self.resize_keep_aspect_ratio_with_padding(mask)

            # 하나의 클래스의 여러 분절된 세그먼테이션을 모두 포함하는 바운딩 박스를 구함
            bbox_points, mask_points = self.find_bounding_square(mask)

            x1, y1, x2, y2 = self.getBiggestBoundbox(bbox_points)
            bbox = [x1, y1, x2, y2]
            # print('w:{} h:{} len:{} pts:{} x1<x2:{} y1<y2:{} name:{}'.format(x2-x1, y2-y1, len(bbox_points), bbox_points, x1<x2, y1<y2, filename))


        img, boxes, masks = img_and_key_point_augmentation(self.augmentation, img, boxes, masks)

        anno = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "masks": masks,
            "labels": torch.tensor(gt_classes),

        target = BoxList(anno["boxes"], (width, height), mode="xyxy")
        target.add_field("labels", anno["labels"])

        masks = SegmentationMask(anno["masks"], (width, height), type=self.mask_type)
        target.add_field("masks", masks)

        return img, target
Esempio n. 11
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Esempio n. 12
    def forward(self, x, boxes):
        mask_prob = x

        scores = None
        if self.keypointer:
            mask_prob, scores = self.keypointer(x, boxes)

        assert len(boxes) == 1, "Only non-batched inference supported for now"
        boxes_per_image = [box.bbox.size(0) for box in boxes]
        mask_prob = mask_prob.split(boxes_per_image, dim=0)
        scores = scores.split(boxes_per_image, dim=0)

        results = []
        for prob, box, score in zip(mask_prob, boxes, scores):
            bbox = BoxList(box.bbox, box.size, mode="xyxy")
            for field in box.fields():
                bbox.add_field(field, box.get_field(field))
            prob = PersonKeypoints(prob, box.size)
            prob.add_field("logits", score)
            bbox.add_field("keypoints", prob)

        return results
Esempio n. 13
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)

        return results