Beispiel #1
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, _, H, W = box_cls.shape
        A = box_regression.shape[1] // 4
        C = box_cls.shape[1] // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.reshape(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max_v=self.pre_nms_top_n)

        results = []
        for i in range(box_cls.shape[0]):
            per_box_cls, per_box_regression, per_pre_nms_top_n,per_candidate_inds, per_anchors = \
                box_cls[i],box_regression[i],pre_nms_top_n[i],candidate_inds[i],anchors[i]

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            if per_class.numel() > 0:
                per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Beispiel #2
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size, mode='poly')
        target.add_field("masks", masks)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Beispiel #3
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(box_regression.view(-1, 4),
                                          concat_anchors.view(-1, 4))

        proposals = proposals.view(N, -1, 4)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result
Beispiel #4
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)
        if not self.is_train:
            if self._transforms is not None:
                img, target = self._transforms(img, None)
            return img, target, idx
        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = np.array([obj["bbox"] for obj in anno], dtype=np.float32)
        boxes = boxes.reshape(-1, 4)
        target = BoxList(boxes, img.size, mode="xywh", to_jittor=False)
        target = target.convert("xyxy")
        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = np.array(classes, dtype=np.int32)
        target.add_field("labels", classes)

        if self.with_masks and anno and "segmentation" in anno[0]:
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks,
                                     img.size,
                                     mode='poly',
                                     to_jittor=False)
            target.add_field("masks", masks)

        if self.with_masks and anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size, to_jittor=False)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)
        if self._transforms is not None:
            img, target = self._transforms(img, target)
        img = img.astype(np.float32)
        return img, target, idx
Beispiel #5
0
    def forward_for_single_feature_map(self, locations, box_cls,
                                       box_regression, centerness,
                                       proposal_embed, proposal_margin,
                                       image_sizes, level):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).transpose(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).transpose(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        centerness = centerness.view(N, 1, H, W).transpose(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()

        proposal_embed = proposal_embed.view(N, -1, H, W).transpose(0, 2, 3, 1)
        proposal_embed = proposal_embed.reshape(N, H * W, -1)
        proposal_margin = proposal_margin.view(N, 1, H,
                                               W).transpose(0, 2, 3, 1)
        proposal_margin = proposal_margin.reshape(N, -1)

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max_v=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :].unsqueeze(2)

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            if per_candidate_nonzeros.numel() > 0:
                per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_proposal_embed = proposal_embed[i]
            per_proposal_embed = per_proposal_embed[per_box_loc, :]
            per_proposal_margin = proposal_margin[i][per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n.item(), sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]
                per_proposal_embed = per_proposal_embed[top_k_indices]
                per_proposal_margin = per_proposal_margin[top_k_indices]

            detections = jt.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                  dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist.add_field("proposal_embed", per_proposal_embed)
            boxlist.add_field("proposal_margin", per_proposal_margin)
            if boxlist.bbox.numel() > 0:
                boxlist = boxlist.clip_to_image(remove_empty=False)
                boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
    def forward_for_single_feature_map(self, locations, box_cls,
                                       box_regression, centerness,
                                       image_sizes):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, self.num_classes - 1).sigmoid()
        box_regression = box_regression.view(N, self.dense_points * 4, H,
                                             W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        centerness = centerness.view(N, self.dense_points, H,
                                     W).permute(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max_v=self.pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :].unsqueeze(2)
        results = []
        #print('forward_for_single_feature_map start',N)
        for i in range(N):
            #print(i)
            per_box_cls = box_cls[i]

            per_candidate_inds = candidate_inds[i]
            #print(per_candidate_inds.shape,per_box_cls.shape)
            # if per_candidate_inds.sum().item()>0:
            #    per_box_cls = per_box_cls[per_candidate_inds]
            # else:
            #    per_box_cls = jt.zeros((0,),dtype=per_box_cls.dtype)

            #print(per_candidate_inds.shape,jt.sum(per_candidate_inds))
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            # if per_candidate_nonzeros.numel()>0:
            #     per_class = per_candidate_nonzeros[:, 1] + 1
            per_class = per_candidate_nonzeros[:, 1] + 1
            #print(per_candidate_nonzeros.shape)

            per_box_regression = box_regression[i]
            #print('GG',per_box_loc.numel(),per_box_loc.shape)
            # if per_box_loc.numel()>0:
            #     per_box_regression = per_box_regression[per_box_loc]
            #     per_locations = locations[per_box_loc]
            # else:
            #     shape = list(per_box_regression.shape)
            #     shape[0]=0
            #     per_box_regression = jt.zeros(shape,dtype=per_box_regression.dtype)
            #     shape = list(locations.shape)
            #     shape[0]=0
            #     per_locations = jt.zeros(shape,dtype=locations.dtype)

            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]
            #print('??')
            #print('per_box_cls1',per_box_cls.mean())

            per_pre_nms_top_n = pre_nms_top_n[i]

            #print('per_locations',jt.mean(per_locations))
            #print('per_box_regressions',jt.mean(per_box_regression))
            #print(per_pre_nms_top_n.item(),per_candidate_inds.sum().item())
            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n.item(), sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            #print('per_box_cls',per_box_cls.mean())
            #print('emmm',jt.mean(per_locations))
            #print('hhh',jt.mean(per_box_regression))
            # if per_box_loc.numel()>0:
            #     detections = jt.stack([
            #     per_locations[:, 0] - per_box_regression[:, 0],
            #     per_locations[:, 1] - per_box_regression[:, 1],
            #     per_locations[:, 0] + per_box_regression[:, 2],
            #     per_locations[:, 1] + per_box_regression[:, 3],
            # ], dim=1)
            # else:
            #     detections = jt.zeros((0,4),dtype=per_locations.dtype)
            detections = jt.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                  dim=1)
            #print('detections',jt.mean(detections),detections.shape)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            if self.is_sqrt:
                boxlist.add_field("scores", per_box_cls.sqrt())
            else:
                boxlist.add_field("scores", per_box_cls)
            #print('??',boxlist.get_field('scores'))
            if boxlist.bbox.numel() > 0:
                boxlist = boxlist.clip_to_image(remove_empty=False)
                boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)
            #print('Good')

        return results
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        # global II
        # import pickle
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H,
                                         W).reshape(N, -1)
        # print('objectness',objectness.mean())

        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        # print('regression',box_regression.mean())

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # print(pre_nms_top_n)
        #print('objectness',objectness)
        # objectness = jt.array(pickle.load(open(f'/home/lxl/objectness_0_{II}.pkl','rb')))

        # print(objectness.shape)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)

        # print(II,'topk',topk_idx.sum(),topk_idx.shape)
        batch_idx = jt.arange(N).unsqueeze(1)

        # pickle.dump(topk_idx.numpy(),open(f'/home/lxl/topk_idx_{II}_jt.pkl','wb'))
        # topk_idx_tmp = topk_idx.numpy()
        # batch_idx = jt.array(pickle.load(open(f'/home/lxl/batch_idx_{II}.pkl','rb')))
        # topk_idx = jt.array(pickle.load(open(f'/home/lxl/topk_idx_{II}.pkl','rb')))

        # err = np.abs(topk_idx_tmp-topk_idx.numpy())
        # print('Error!!!!!!!!!!!!!!!!',err.sum())
        # print(err.nonzero())

        #print('box_regression0',box_regression)
        #batch_idx = jt.index(topk_idx.shape,dim=0)
        box_regression = box_regression[batch_idx, topk_idx]
        #print('box_regression1',box_regression)

        image_shapes = [box.size for box in anchors]
        concat_anchors = jt.contrib.concat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        # box_regression = jt.array(pickle.load(open(f'/home/lxl/box_regression_{II}.pkl','rb')))
        # concat_anchors = jt.array(pickle.load(open(f'/home/lxl/concat_anchors_{II}.pkl','rb')))

        proposals = self.box_coder.decode(box_regression.reshape(-1, 4),
                                          concat_anchors.reshape(-1, 4))

        proposals = proposals.reshape(N, -1, 4)

        # proposals = jt.array(pickle.load(open(f'/home/lxl/proposal_{II}.pkl','rb')))
        # objectness = jt.array(pickle.load(open(f'/home/lxl/objectness_{II}.pkl','rb')))
        # II+=1

        result = []
        for i in range(len(image_shapes)):
            proposal, score, im_shape = proposals[i], objectness[
                i], image_shapes[i]
            boxlist = BoxList(proposal, im_shape, mode="xyxy")
            boxlist.add_field("objectness", score)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            boxlist = boxlist_nms(
                boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            result.append(boxlist)
        return result