Esempi in Python per Instances, esempi in Python per detectron2.structures.Instances

Esempio n. 1

0

Mostra file

File: d2_deserializer.py Progetto: billdoors/detectron2-sagemaker

def json_to_d2(height, width, json):
    inst = Instances((height, width,), **json)
    print(inst)
    print(type(inst))
    return inst

Esempio n. 2

0

Mostra file

File: rpn_outputs.py Progetto: MILVLG/bottom-up-attention.pytorch

def find_top_bua_rpn_proposals(
    proposals,
    pred_objectness_logits,
    images,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_side_len,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        images (ImageList): Input images as an :class:`ImageList`.
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_side_len (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    image_sizes = images.image_sizes  # in (h, w) order
    image_scales = images.image_scales
    device = proposals[0].device

    # 1. Concat all levels together
    all_scores = []
    all_proposals = []
    level_ids = []
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        all_proposals.append(proposals_i)
        all_scores.append(logits_i)
        level_ids.append(
            torch.full((Hi_Wi_A, ), level_id, dtype=torch.int64,
                       device=device))

    all_scores = cat(all_scores, dim=1)
    all_proposals = cat(all_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 2. For each image, run a choose pre_nms_topk proposal ,per-level NMS, and choose post_nms_topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = BUABoxes(all_proposals[n])
        scores_per_img = all_scores[n]
        boxes.clip(image_size)
        keep = boxes.filter_boxes()
        boxes = boxes[keep]
        scores_per_img = scores_per_img[keep]
        lvl = level_ids[keep]

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_side_len * image_scales[n])
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], lvl[keep]

        # choose pre_nms_topk proposal
        Hi_Wi_A = scores_per_img.shape[0]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        scores_per_img, idx = scores_per_img.sort(descending=True, dim=0)
        topk_scores_i = scores_per_img[:num_proposals_i]
        topk_idx = idx[:num_proposals_i]
        topk_boxes_i = boxes[topk_idx, :]
        lvl_i = lvl[topk_idx]

        keep = batched_nms(topk_boxes_i.tensor, topk_scores_i, lvl_i,
                           nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)
        res.proposal_boxes = topk_boxes_i[keep]
        res.objectness_logits = topk_scores_i[keep]
        results.append(res)
    return results

Esempio n. 3

0

Mostra file

File: retinanet.py Progetto: dattran2346/detectron2

    def inference_single_image(self, box_cls, box_delta, anchors, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors for that
                image in that feature level.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta,
                                                   anchors):
            # (HxWxAxK,)
            box_cls_i = box_cls_i.flatten().sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(
                box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result

Esempio n. 4

0

Mostra file

File: rotated_dataset_mapper.py Progetto: zivzone/d2go

    def _original_call(self, dataset_dict):
        """
        Modified from detectron2's original __call__ in DatasetMapper
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        image = self._read_image(dataset_dict, format=self.img_format)
        if not self.backfill_size:
            utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        assert not self.load_proposals, "Not supported!"

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # Convert dataset_dict["annotations"] to dataset_dict["instances"]
            annotations = [
                obj for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]

            # Convert either rotated box or horizontal box to XYWHA_ABS format
            original_boxes = [
                BoxMode.convert(
                    box=obj["bbox"],
                    from_mode=obj["bbox_mode"],
                    to_mode=BoxMode.XYWHA_ABS,
                ) for obj in annotations
            ]

            transformed_boxes = transforms.apply_rotated_box(
                np.array(original_boxes, dtype=np.float64))

            instances = Instances(image_shape)
            instances.gt_classes = torch.tensor(
                [obj["category_id"] for obj in annotations], dtype=torch.int64)
            instances.gt_boxes = RotatedBoxes(transformed_boxes)
            instances.gt_boxes.clip(image_shape)

            dataset_dict["instances"] = instances[
                instances.gt_boxes.nonempty()]

        return dataset_dict

Esempio n. 5

0

Mostra file

File: detection_utils.py Progetto: peternara/Res2Net-detectron2-object-detection

def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target

Esempio n. 6

0

Mostra file

File: fcos_outputs.py Progetto: zhubinQAQ/Ins

    def forward_for_single_feature_map(self,
                                       locations,
                                       logits_pred,
                                       reg_pred,
                                       ctrness_pred,
                                       image_sizes,
                                       top_feat=None):
        N, C, H, W = logits_pred.shape

        # put in the same format as locations
        logits_pred = logits_pred.view(N, C, H, W).permute(0, 2, 3, 1)
        logits_pred = logits_pred.reshape(N, -1, C).sigmoid()
        box_regression = reg_pred.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        ctrness_pred = ctrness_pred.view(N, 1, H, W).permute(0, 2, 3, 1)
        ctrness_pred = ctrness_pred.reshape(N, -1).sigmoid()
        if top_feat is not None:
            top_feat = top_feat.view(N, -1, H, W).permute(0, 2, 3, 1)
            top_feat = top_feat.reshape(N, H * W, -1)

        # if self.thresh_with_ctr is True, we multiply the classification
        # scores with centerness scores before applying the threshold.
        if self.thresh_with_ctr:
            logits_pred = logits_pred * ctrness_pred[:, :, None]
        candidate_inds = logits_pred > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_topk)

        if not self.thresh_with_ctr:
            logits_pred = logits_pred * ctrness_pred[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = logits_pred[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]
            if top_feat is not None:
                per_top_feat = top_feat[i]
                per_top_feat = per_top_feat[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]
                if top_feat is not None:
                    per_top_feat = per_top_feat[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            boxlist = Instances(image_sizes[i])
            boxlist.pred_boxes = Boxes(detections)
            boxlist.scores = torch.sqrt(per_box_cls)
            boxlist.pred_classes = per_class
            boxlist.locations = per_locations
            if top_feat is not None:
                boxlist.top_feat = per_top_feat
            results.append(boxlist)

        return results

Esempio n. 7

0

Mostra file

File: demo_late_fusion_all.py Progetto: Jamie725/RGBT-detection

def apply_late_fusion_and_evaluate(cfg, evaluator, det_1, det_2, method):
    evaluator.reset()
    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'

    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0

    print('Method: ', method)

    for i in range(num_img):
        info_1 = {}
        info_1['img_name'] = det_1['image'][i]
        info_1['bbox'] = det_1['boxes'][i]
        info_1['score'] = det_1['scores'][i]
        info_1['class'] = det_1['classes'][i]

        info_2 = {}
        info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg'
        info_2['bbox'] = det_2['boxes'][i]
        info_2['score'] = det_2['scores'][i]
        info_2['class'] = det_2['classes'][i]
        #pdb.set_trace()
        if len(info_1['bbox']) == 0 or len(info_2['bbox']) == 0:
            if (len(info_1['bbox']) > 0):
                out_boxes = np.array(info_1['bbox'])
                out_class = torch.Tensor(info_1['class'])
                out_scores = torch.Tensor(info_1['score'])
            elif (len(info_2['bbox']) > 0):
                out_boxes = np.array(info_2['bbox'])
                out_class = torch.Tensor(info_2['class'])
                out_scores = torch.Tensor(info_2['score'])
            else:
                out_boxes = np.array(info_2['bbox'])
                out_class = torch.Tensor(info_2['class'])
                out_scores = torch.Tensor(info_2['score'])
        else:
            if method == 'nms':
                out_boxes, out_scores, out_class = nms_1(info_1, info_2)
            elif method == 'pooling':
                in_boxes, in_scores, in_class = prepare_data(info_1, info_2)
                out_boxes = in_boxes
                out_scores = torch.Tensor(in_scores)
                out_class = torch.Tensor(in_class)
            elif method == 'baysian' or method == 'baysian_avg_bbox' or method == 'avg_score' or method == 'baysian_wt_score_box':
                threshold = 0.5
                in_boxes, in_scores, in_class = prepare_data(info_1, info_2)
                keep, out_scores, out_boxes, out_class = nms_2(
                    in_boxes, in_scores, in_class, threshold, method)

        count_1 += len(info_1['bbox'])
        count_2 += len(info_2['bbox'])
        count_fusion += len(out_boxes)

        file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg'
        img = cv2.imread(file_name)
        H, W, _ = img.shape

        # Handle inputs
        inputs = []
        input_info = {}
        input_info['file_name'] = file_name
        input_info['height'] = H
        input_info['width'] = W
        input_info['image_id'] = det_2['image_id'][i]
        input_info['image'] = torch.Tensor(img)
        inputs.append(input_info)

        # Handle outputs
        outputs = []
        out_info = {}
        proposals = Instances([H, W])
        proposals.pred_boxes = Boxes(out_boxes)
        proposals.scores = out_scores
        proposals.pred_classes = out_class
        out_info['instances'] = proposals
        outputs.append(out_info)
        evaluator.process(inputs, outputs)

        img = draw_box(img, out_boxes, (0, 255, 0))
        out_img_name = 'out_img_baysian_fusion/' + file_name.split(
            'thermal_8_bit/')[1].split('.')[0] + '_baysian_avg_bbox.jpg'
        #cv2.imwrite(out_img_name, img)
        #pdb.set_trace()
        """
        if '09115' in file_name:
            out_img_name = 'out_img_baysian_fusion/' + file_name.split('thermal_8_bit/')[1].split('.')[0]+'_baysian_avg_bbox.jpg'
            pdb.set_trace()
            cv2.imwrite(out_img_name, img)
        """

    results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out')

    if results is None:
        results = {}

    avgRGB = count_1 / num_img
    avgThermal = count_2 / num_img
    avgNMS = count_fusion / num_img

    print('Avg bbox for RGB:', avgRGB, "average count thermal:", avgThermal,
          'average count nms:', avgNMS)
    return results

Esempio n. 8

0

Mostra file

 def forward(self, scores, proposal_boxes):
     instances = Instances((10, 10))
     instances.proposal_boxes = Boxes(proposal_boxes)
     return self._output_layer.predict_probs((scores, None),
                                             [instances])

Esempio n. 9

0

Mostra file

File: retinaface.py Progetto: DeqiangWang/MyD2Ext

    def inference_single_image(
            self,
            anchors: List[Boxes],
            box_cls: List[Tensor],
            box_delta: List[Tensor],
            oks_delta: List[Tensor],
            image_size: Tuple[int, int],
    ):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors in that feature level.
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all, keypoint_all, scores_all, class_idxs_all = [[] for _ in range(4)]

        # Iterate over every feature level
        for box_cls_i, box_reg_i, oks_reg_i, anchors_i in zip(box_cls, box_delta, oks_delta, anchors):
            # HxWxAxK,
            predicted_prob = box_cls_i.flatten().sigmoid_()

            # Apply two filtering below to make NMS faster.
            # 1. Keep boxes with confidence score higher than threshold
            keep_idxs = predicted_prob > self.test_score_thresh
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = nonzero_tuple(keep_idxs)[0]

            # 2. Keep top k top scoring boxes only
            num_topk = min(self.test_topk_candidates, topk_idxs.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, idxs = predicted_prob.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[idxs[:num_topk]]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            oks_reg_i = oks_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)
            predicted_marks = self.mark2mark_transform.apply_deltas(oks_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            keypoint_all.append(predicted_marks)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, keypoint_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, keypoint_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.test_nms_thresh)
        keep = keep[: self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        keypoints_all = keypoint_all[keep].reshape(-1, self.num_landmark, 2)
        keypoints_all = torch.cat(
            (keypoints_all, 2 * torch.ones(keypoints_all.shape[0], self.num_landmark, 1).to(self.device)), dim=2)
        result.pred_keypoints = keypoints_all

        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result

Esempio n. 10

0

Mostra file

    def test_StandardROIHeads_scriptability(self):
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        cfg.MODEL.MASK_ON = True
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        roi_heads = StandardROIHeads(cfg, feature_shape).eval()

        proposal0 = Instances(image_sizes[0])
        proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                       dtype=torch.float32)
        proposal0.proposal_boxes = Boxes(proposal_boxes0)
        proposal0.objectness_logits = torch.tensor([0.5, 0.7],
                                                   dtype=torch.float32)

        proposal1 = Instances(image_sizes[1])
        proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]],
                                       dtype=torch.float32)
        proposal1.proposal_boxes = Boxes(proposal_boxes1)
        proposal1.objectness_logits = torch.tensor([0.1, 0.9],
                                                   dtype=torch.float32)
        proposals = [proposal0, proposal1]

        pred_instances, _ = roi_heads(images, features, proposals)
        fields = {
            "objectness_logits": "Tensor",
            "proposal_boxes": "Boxes",
            "pred_classes": "Tensor",
            "scores": "Tensor",
            "pred_masks": "Tensor",
            "pred_boxes": "Boxes",
            "pred_keypoints": "Tensor",
            "pred_keypoint_heatmaps": "Tensor",
        }
        with patch_instances(fields) as new_instances:
            proposal0 = new_instances.from_instances(proposal0)
            proposal1 = new_instances.from_instances(proposal1)
            proposals = [proposal0, proposal1]
            scripted_rot_heads = torch.jit.script(roi_heads)
            scripted_pred_instances, _ = scripted_rot_heads(
                images, features, proposals)

        for instance, scripted_instance in zip(pred_instances,
                                               scripted_pred_instances):
            self.assertEqual(instance.image_size, scripted_instance.image_size)
            self.assertTrue(
                torch.equal(instance.pred_boxes.tensor,
                            scripted_instance.pred_boxes.tensor))
            self.assertTrue(
                torch.equal(instance.scores, scripted_instance.scores))
            self.assertTrue(
                torch.equal(instance.pred_classes,
                            scripted_instance.pred_classes))
            self.assertTrue(
                torch.equal(instance.pred_masks, scripted_instance.pred_masks))

Esempio n. 11

0

Mostra file

 def forward(self, proposal_deltas, proposal_boxes):
     instances = Instances((10, 10))
     instances.proposal_boxes = Boxes(proposal_boxes)
     return self._output_layer.predict_boxes(
         (None, proposal_deltas), [instances])

Esempio n. 12

0

Mostra file

File: train_learning_based_late_fusion_2.py Progetto: Jamie725/RGBT-detection

def evaluate(cfg, evaluator, det_1, det_2, predictor, method, bayesian=False):
    evaluator.reset()
    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'
    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0

    print('Method: ', method)

    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'
    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0
    X = None
    Y = np.array([])
    cnt = 0

    for i in range(num_img):
        info_1 = {}
        info_1['img_name'] = det_1['image'][i]
        info_1['bbox'] = det_1['boxes'][i]
        info_1['score'] = det_1['scores'][i]
        info_1['class'] = det_1['classes'][i]
        info_1['class_logits'] = det_1['class_logits'][i]
        if 'probs' in det_1.keys():
            info_1['prob'] = det_1['probs'][i]

        info_2 = {}
        info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg'
        info_2['bbox'] = det_2['boxes'][i]
        info_2['score'] = det_2['scores'][i]
        info_2['class'] = det_2['classes'][i]
        info_2['class_logits'] = det_2['class_logits'][i]
        if 'probs' in det_2.keys():
            info_2['prob'] = det_2['probs'][i]

        #img_id = int(info_1['img_name'].split('.')[0].split('_')[1]) - 1
        img_id = det_1['image_id'][i]

        # If no any detection in two results
        if len(info_1['bbox']) == 0 and len(info_2['bbox']) == 0:
            continue
        # If no detection in 1st model:
        elif len(info_1['bbox']) == 0:
            print('model 1 miss detected')
            in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det(
                info_2)
        elif len(info_2['bbox']) == 0:
            print('model 2 miss detected')
            in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt_1_det(
                info_1)
        else:
            in_boxes, in_scores, in_class, in_logits, in_prob, num_det = prepare_data_gt(
                info_1, info_2)
        score_results, box_results, class_results = nms_multiple_box_eval(
            in_boxes, in_scores, in_class, in_logits, in_prob, 0.5, num_det,
            method)

        if bayesian:
            # summing logits
            sum_logits = score_results[:, :4] + score_results[:, 4:]
            pred_prob_multiclass = F.softmax(torch.Tensor(sum_logits)).tolist()
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)
        else:
            pred_prob_multiclass = predictor.predict_proba(score_results)
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)

        #pdb.set_trace()
        """
        Send information to evaluator
        """
        # Image info
        file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg'
        img = cv2.imread(file_name)
        H, W, _ = img.shape

        # Handle inputs
        inputs = []
        input_info = {}
        input_info['file_name'] = file_name
        input_info['height'] = H
        input_info['width'] = W
        input_info['image_id'] = det_1['image_id'][i]
        input_info['image'] = torch.Tensor(img)
        inputs.append(input_info)

        # Handle outputs
        outputs = []
        out_info = {}
        proposals = Instances([H, W])
        proposals.pred_boxes = Boxes(box_results)
        proposals.scores = torch.Tensor(out_scores)
        proposals.pred_classes = torch.Tensor(out_class)
        out_info['instances'] = proposals
        outputs.append(out_info)
        evaluator.process(inputs, outputs)

    results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out')

    if results is None:
        results = {}

    avgRGB = count_1 / num_img
    avgThermal = count_2 / num_img
    avgNMS = count_fusion / num_img

    print('Avg bbox for RGB:', avgRGB, "average count thermal:", avgThermal,
          'average count nms:', avgNMS)
    return results

Esempio n. 13

0

Mostra file

def apply_late_fusion_and_evaluate(cfg,
                                   evaluator,
                                   det_1,
                                   det_2,
                                   method,
                                   predictor,
                                   det_3='',
                                   bayesian=False):
    evaluator.reset()
    img_folder = '../../../Datasets/FLIR/val/thermal_8_bit/'

    num_img = len(det_2['image'])
    count_1 = 0
    count_2 = 0
    count_fusion = 0

    print('Method: ', method)

    for i in range(num_img):
        info_1 = {}
        info_1['img_name'] = det_1['image'][i]
        info_1['bbox'] = det_1['boxes'][i]
        info_1['score'] = det_1['scores'][i]
        info_1['class'] = det_1['classes'][i]
        info_1['class_logits'] = det_1['class_logits'][i]
        if 'probs' in det_1.keys():
            info_1['prob'] = det_1['probs'][i]

        info_2 = {}
        info_2['img_name'] = det_2['image'][i].split('.')[0] + '.jpeg'
        info_2['bbox'] = det_2['boxes'][i]
        info_2['score'] = det_2['scores'][i]
        info_2['class'] = det_2['classes'][i]
        info_2['class_logits'] = det_2['class_logits'][i]
        if 'probs' in det_2.keys():
            info_2['prob'] = det_2['probs'][i]

        if len(info_1['bbox']) > 0:
            num_1 = 1
        else:
            num_1 = 0
        if len(info_2['bbox']) > 0:
            num_2 = 1
        else:
            num_2 = 0

        num_detections = num_1 + num_2

        if det_3:
            info_3 = {}
            info_3['img_name'] = det_3['image'][i].split('.')[0] + '.jpeg'
            info_3['bbox'] = det_3['boxes'][i]
            info_3['score'] = det_3['scores'][i]
            info_3['class'] = det_3['classes'][i]
            info_3['class_logits'] = det_3['class_logits'][i]
            if 'probs' in det_3.keys():
                info_3['prob'] = det_3['probs'][i]
            if len(info_3['bbox']) > 0:
                num_3 = 1
            else:
                num_3 = 0

            num_detections += num_3

        # No detections
        if num_detections == 0:
            continue
        # Only 1 model detection
        elif num_detections == 1:
            if len(info_1['bbox']) > 0:
                out_boxes = np.array(info_1['bbox'])
                out_class = torch.Tensor(info_1['class'])
                out_scores = torch.Tensor(info_1['score'])
                num_det_1 = len(info_1['class_logits'])
                out_logits = np.zeros((num_det_1, 8))
                for k in range(num_det_1):
                    out_logits[k, :4] = info_1['class_logits'][k]

            elif len(info_2['bbox']) > 0:
                out_boxes = np.array(info_2['bbox'])
                out_class = torch.Tensor(info_2['class'])
                out_scores = torch.Tensor(info_2['score'])
                num_det_2 = len(info_1['class_logits'])
                out_logits = np.zeros((num_det_2, 8))
                for k in range(num_det_1):
                    out_logits[k, 4:] = info_1['class_logits'][k]
            else:
                if det_3:
                    out_boxes = np.array(info_3['bbox'])
                    out_class = torch.Tensor(info_3['class'])
                    out_scores = torch.Tensor(info_3['score'])
        # Only two models with detections
        elif num_detections == 2:
            #pdb.set_trace()
            if not det_3:
                if method == 'learned_fusion' or method == 'logRegression':
                    out_boxes, out_scores, out_class, out_logits, _, _ = fusion(
                        method, info_1, info_2, predictor=predictor)
                else:
                    out_boxes, out_scores, out_class = fusion(
                        method, info_1, info_2)
            else:
                if len(info_1['bbox']) == 0:
                    out_boxes, out_scores, out_class = fusion(
                        method, info_2, info_3)
                elif len(info_2['bbox']) == 0:
                    out_boxes, out_scores, out_class = fusion(
                        method, info_1, info_3)
                else:
                    out_boxes, out_scores, out_class = fusion(
                        method, info_1, info_2)
        # All 3 models detected things
        else:
            out_boxes, out_scores, out_class = fusion(method,
                                                      info_1,
                                                      info_2,
                                                      info_3=info_3)

        if bayesian:
            sum_logits = out_logits[:, :4] + out_logits[:, 4:]
            pred_prob_multiclass = F.softmax(torch.Tensor(sum_logits)).tolist()
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)
        elif method == 'learned_fusion':
            pred_logits = predictor(torch.Tensor(out_logits).cuda(0))
            pred_prob_multiclass = F.softmax(pred_logits, dim=1).tolist()
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)
        elif method == 'logRegression':
            pred_prob_multiclass = predictor.predict_proba(out_logits)
            out_scores = np.max(pred_prob_multiclass, axis=1)
            out_class = np.argmax(pred_prob_multiclass, axis=1)

        file_name = img_folder + info_1['img_name'].split('.')[0] + '.jpeg'
        img = cv2.imread(file_name)
        try:
            H, W, _ = img.shape
        except:
            pdb.set_trace()

        # Handle inputs
        inputs = []
        input_info = {}
        input_info['file_name'] = file_name
        input_info['height'] = H
        input_info['width'] = W
        input_info['image_id'] = det_2['image_id'][i]
        input_info['image'] = torch.Tensor(img)
        inputs.append(input_info)

        # Handle outputs
        outputs = []
        out_info = {}
        proposals = Instances([H, W])
        proposals.pred_boxes = Boxes(out_boxes)
        proposals.scores = out_scores
        proposals.pred_classes = out_class
        out_info['instances'] = proposals
        outputs.append(out_info)
        evaluator.process(inputs, outputs)

    results = evaluator.evaluate(out_eval_path='FLIR_pooling_.out')

    if results is None:
        results = {}

    return results

Esempio n. 14

0

Mostra file

File: edge_det.py Progetto: lkevinzc/dance

    def forward(self, features, pred_instances=None, targets=None):
        if self.edge_on:
            with timer.env("pfpn_back"):
                for i, f in enumerate(self.in_features):
                    if i == 0:
                        x = self.scale_heads[i](features[f])
                    else:
                        x = x + self.scale_heads[i](features[f])

        if self.edge_on:
            with timer.env("edge"):
                pred_logits = self.predictor(x)
                pred_edge = pred_logits.sigmoid()
                if self.attention:
                    # print('pred edge', pred_edge)
                    att_map = self.attender(
                        1 - pred_edge
                    )  # regions that need evolution

        if self.training:
            edge_target = targets[0]
            if self.edge_in:
                edge_prior = targets[0].unsqueeze(1).float().clone()  # (B, 1, H, W)
                edge_prior[edge_prior == self.ignore_value] = 0  # remove ignore value

                edge_prior = self.mean_filter(edge_prior)
                edge_prior = F.interpolate(
                    edge_prior,
                    scale_factor=1 / self.common_stride,
                    mode="bilinear",
                    align_corners=False,
                )
                edge_prior[edge_prior > 0] = 1

                if self.strong_feat:
                    snake_input = torch.cat([edge_prior, x], dim=1)
                else:
                    snake_input = torch.cat([edge_prior, features["p2"]], dim=1)
            else:
                if self.strong_feat:
                    snake_input = x
                else:
                    snake_input = features["p2"]

            if self.edge_on:
                pred_edge_full = F.interpolate(
                    pred_edge,
                    scale_factor=self.common_stride,
                    mode="bilinear",
                    align_corners=False,
                )

            if self.selective_refine:
                edge_prior = targets[0].unsqueeze(1).float().clone()  # (B, 1, H, W)
                edge_prior[edge_prior == self.ignore_value] = 0  # remove ignore value
                edge_prior = self.dilate_filter(edge_prior)
                # edge_prior = self.dilate_filter(edge_prior)
                # edge_target = edge_prior.clone()
                edge_prior[edge_prior > 0] = 1
                edge_prior = F.interpolate(
                    edge_prior,
                    scale_factor=1 / self.common_stride,
                    mode="bilinear",
                    align_corners=False,
                )
                if self.strong_feat:
                    snake_input = torch.cat([edge_prior, x], dim=1)
                else:
                    if self.pred_edge:
                        snake_input = torch.cat(
                            [edge_prior, pred_logits, features["p2"]], dim=1
                        )
                    else:
                        snake_input = torch.cat([edge_prior, features["p2"]], dim=1)

            if self.attention:
                if self.strong_feat:
                    snake_input = torch.cat([att_map, x], dim=1)
                else:
                    # dont cater pred_edge option now
                    snake_input = torch.cat([att_map, features["p2"]], dim=1)

            ### Quick fix for batches that do not have poly after filtering
            _, poly_loss = self.refine_head(snake_input, None, targets[1])

            if self.edge_on:
                edge_loss = self.loss(pred_edge_full, edge_target) * self.loss_weight
                poly_loss.update(
                    {
                        "loss_edge_det": edge_loss,
                    }
                )

            return [], poly_loss, []
        else:
            if self.edge_in or self.selective_refine:
                if self.edge_map_thre > 0:
                    pred_edge = (pred_edge > self.edge_map_thre).float()

                if "edge" in self.gt_input:
                    assert targets[0] is not None
                    pred_edge = targets[0].unsqueeze(1).float().clone()
                    pred_edge[pred_edge == self.ignore_value] = 0  # remove ignore value

                    if self.selective_refine:
                        pred_edge = self.dilate_filter(pred_edge)
                        # pred_edge = self.dilate_filter(pred_edge)

                    pred_edge = F.interpolate(
                        pred_edge,
                        scale_factor=1 / self.common_stride,
                        mode="bilinear",
                        align_corners=False,
                    )

                    pred_edge[pred_edge > 0] = 1
                if self.strong_feat:
                    snake_input = torch.cat([pred_edge, x], dim=1)
                else:
                    snake_input = torch.cat([pred_edge, features["p2"]], dim=1)
            else:
                if self.strong_feat:
                    snake_input = x
                else:
                    snake_input = features["p2"]

            if self.attention:
                if self.strong_feat:
                    snake_input = torch.cat([att_map, x], dim=1)
                else:
                    # dont cater pred_edge option now
                    snake_input = torch.cat([att_map, features["p2"]], dim=1)

            if "instance" in self.gt_input:
                assert targets[1][0] is not None

                for im_i in range(len(targets[1][0])):
                    gt_instances_per_im = targets[1][0][im_i]
                    bboxes = gt_instances_per_im.gt_boxes.tensor
                    instances_per_im = Instances(pred_instances[im_i]._image_size)
                    instances_per_im.pred_boxes = Boxes(bboxes)
                    instances_per_im.pred_classes = gt_instances_per_im.gt_classes
                    instances_per_im.scores = torch.ones_like(
                        gt_instances_per_im.gt_classes, device=bboxes.device
                    )
                    if gt_instances_per_im.has("gt_masks"):
                        gt_masks = gt_instances_per_im.gt_masks
                        ext_pts_off = self.refine_head.get_simple_extreme_points(
                            gt_masks.polygons
                        ).to(bboxes.device)
                        ex_t = torch.stack(
                            [ext_pts_off[:, None, 0], bboxes[:, None, 1]], dim=2
                        )
                        ex_l = torch.stack(
                            [bboxes[:, None, 0], ext_pts_off[:, None, 1]], dim=2
                        )
                        ex_b = torch.stack(
                            [ext_pts_off[:, None, 2], bboxes[:, None, 3]], dim=2
                        )
                        ex_r = torch.stack(
                            [bboxes[:, None, 2], ext_pts_off[:, None, 3]], dim=2
                        )
                        instances_per_im.ext_points = ExtremePoints(
                            torch.cat([ex_t, ex_l, ex_b, ex_r], dim=1)
                        )

                        # TODO: NOTE: Test for theoretic limit. #####
                        # contours = self.refine_head.get_simple_contour(gt_masks)
                        # poly_sample_targets = []
                        # for i, cnt in enumerate(contours):
                        #     if cnt is None:
                        #         xmin, ymin = bboxes[:, 0], bboxes[:, 1]  # (n,)
                        #         xmax, ymax = bboxes[:, 2], bboxes[:, 3]  # (n,)
                        #         box = [
                        #             xmax, ymin, xmin, ymin, xmin, ymax, xmax, ymax
                        #         ]
                        #         box = torch.stack(box, dim=1).view(-1, 4, 2)
                        #         sampled_box = self.refine_head.uniform_upsample(box[None],
                        #                                                         self.refine_head.num_sampling)
                        #         poly_sample_targets.append(sampled_box[i])
                        #         # print(sampled_box.shape)
                        #         continue
                        #
                        #     # 1) uniform-sample
                        #     oct_sampled_targets = self.refine_head.uniform_sample(cnt,
                        #                                                           len(cnt) * self.refine_head.num_sampling)  # (big, 2)
                        #     tt_idx = np.random.randint(len(oct_sampled_targets))
                        #     oct_sampled_targets = np.roll(oct_sampled_targets, -tt_idx, axis=0)[::len(cnt)]
                        #     oct_sampled_targets = torch.tensor(oct_sampled_targets, device=bboxes.device)
                        #     poly_sample_targets.append(oct_sampled_targets)
                        #     # print(oct_sampled_targets.shape)
                        #
                        #     # 2) polar-sample
                        #     # ...
                        # poly_sample_targets = torch.stack(poly_sample_targets, dim=0)
                        # instances_per_im.pred_polys = PolygonPoints(poly_sample_targets)
                        # TODO: NOTE: Test for theoretic limit. #####

                    pred_instances[im_i] = instances_per_im

            new_instances, _ = self.refine_head(snake_input, pred_instances, None)
            # new_instances = pred_instances
            if not self.edge_on:
                pred_edge = torch.rand(1, 1, 5, 5, device=snake_input.device)

            if self.attention:
                pred_edge = att_map

            return pred_edge, {}, new_instances

Esempio n. 15

0

Mostra file

def fast_rcnn_inference_single_image_with_anchor(proposals, boxes, scores,
                                                 image_shape, score_thresh,
                                                 nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    anchors = proposals.get_fields()['anchor_boxes'].tensor
    proposals = proposals.get_fields()['proposal_boxes'].tensor
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        anchors = anchors[valid_mask]
        proposals = proposals[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    anchors = Boxes(anchors)
    proposals = Boxes(proposals)
    anchors.clip(image_shape)
    proposals.clip(image_shape)
    anchors = anchors.tensor
    proposals = proposals.tensor
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    anchors = anchors[filter_inds[:, 0]]
    proposals = proposals[filter_inds[:, 0]]
    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds, anchors, proposals = boxes[keep], scores[keep], filter_inds[keep], anchors[keep], \
                                                     proposals[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.anchors = Boxes(anchors)
    result.proposals = Boxes(proposals)
    return result, filter_inds[:, 0]

Esempio n. 16

0

Mostra file

    def _desc_to_example(desc: Dict):
        # Detectron2 Model Input Format:
        # image: Tensor[C, H, W];
        # height, width: output height and width;
        # instances: Instances Object to training, with the following fields:
        #     "gt_boxes":
        #     "gt_classes":
        #     "gt_masks": a PolygonMasks or BitMasks object storing N masks, one for each instance.
        desc = copy.deepcopy(desc)  # it will be modified by code below
        image_path = os.path.join(images_dir, f'{desc["image_id"]}.jpg')
        # shape: [H, W, C]
        origin_image = detection_utils.read_image(image_path, format="BGR")
        oh, ow, oc = origin_height, origin_width, origin_channels = origin_image.shape

        if augmentations is not None:
            aug_input = T.AugInput(origin_image)
            transforms = augmentations(aug_input)
            auged_image = aug_input.image
        else:
            auged_image = origin_image
        ah, aw, ac = auged_height, auged_width, auged_channels = auged_image.shape

        if not is_train:
            return {
                "image_id":
                desc['image_id'],  # COCOEvaluator.process() need it.
                # expected shape: [C, H, W]
                "image":
                torch.as_tensor(
                    np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
                "height":
                auged_height,
                "width":
                auged_width,
            }

        target = Instances(image_size=(ah, aw))
        if 'fill gt_boxes':
            # shape: n_box, 4
            boxes_abs = np.array(
                [anno['bbox'] for anno in desc['annotations']])
            if augmentations is not None:
                # clip transformed bbox to image size
                boxes_auged = transforms.apply_box(
                    np.array(boxes_abs)).clip(min=0)
                boxes_auged = np.minimum(
                    boxes_auged,
                    np.array([aw, ah, aw, ah])[np.newaxis, :])
            else:
                boxes_auged = boxes_abs
            target.gt_boxes = Boxes(boxes_auged)
        if 'fill gt_classes':
            classes = [anno['category_id'] for anno in desc['annotations']]
            classes = torch.tensor(classes, dtype=torch.int64)
            target.gt_classes = classes
        if 'fill gt_masks':
            mask_paths = [
                os.path.join(masks_dir, f'{anno["mask_id"]}.png')
                for anno in desc['annotations']
            ]
            masks = np.array(
                list(
                    map(
                        lambda p: cv2.resize(cv2.imread(
                            p, flags=cv2.IMREAD_GRAYSCALE),
                                             dsize=(ow, oh)), mask_paths)))
            if augmentations is not None:
                masks_auged = np.array(
                    list(map(lambda x: transforms.apply_segmentation(x),
                             masks)))
            else:
                masks_auged = masks
            masks_auged = masks_auged > MASK_THRESHOLD
            masks_auged = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x))
                    for x in masks_auged
                ]))
            target.gt_masks = masks_auged

        return {
            "image_id":
            desc['image_id'],  # COCOEvaluator.process() need it.
            # expected shape: [C, H, W]
            "image":
            torch.as_tensor(
                np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
            "height":
            auged_height,
            "width":
            auged_width,
            "instances":
            target,  # refer: annotations_to_instances()
        }

Esempio n. 17

0

Mostra file

File: fcos_outputs.py Progetto: zhubinQAQ/Ins

    def losses(self,
               logits_pred,
               reg_pred,
               ctrness_pred,
               locations,
               gt_instances,
               top_feats=None):
        """
        Return the losses from a set of FCOS predictions and their associated ground-truth.

        Returns:
            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
        """

        training_targets = self._get_ground_truth(locations, gt_instances)

        # Collect all logits and regression predictions over feature maps
        # and images to arrive at the same shape as the labels and targets
        # The final ordering is L, N, H, W from slowest to fastest axis.

        instances = Instances((0, 0))
        instances.labels = cat(
            [
                # Reshape: (N, 1, Hi, Wi) -> (N*Hi*Wi,)
                x.reshape(-1) for x in training_targets["labels"]
            ],
            dim=0)
        instances.gt_inds = cat(
            [
                # Reshape: (N, 1, Hi, Wi) -> (N*Hi*Wi,)
                x.reshape(-1) for x in training_targets["target_inds"]
            ],
            dim=0)
        instances.im_inds = cat(
            [x.reshape(-1) for x in training_targets["im_inds"]], dim=0)
        instances.reg_targets = cat(
            [
                # Reshape: (N, Hi, Wi, 4) -> (N*Hi*Wi, 4)
                x.reshape(-1, 4) for x in training_targets["reg_targets"]
            ],
            dim=0,
        )
        instances.locations = cat(
            [x.reshape(-1, 2) for x in training_targets["locations"]], dim=0)
        instances.fpn_levels = cat(
            [x.reshape(-1) for x in training_targets["fpn_levels"]], dim=0)

        instances.logits_pred = cat(
            [
                # Reshape: (N, C, Hi, Wi) -> (N, Hi, Wi, C) -> (N*Hi*Wi, C)
                x.permute(0, 2, 3, 1).reshape(-1, self.num_classes)
                for x in logits_pred
            ],
            dim=0,
        )
        instances.reg_pred = cat(
            [
                # Reshape: (N, B, Hi, Wi) -> (N, Hi, Wi, B) -> (N*Hi*Wi, B)
                x.permute(0, 2, 3, 1).reshape(-1, 4) for x in reg_pred
            ],
            dim=0,
        )
        instances.ctrness_pred = cat(
            [
                # Reshape: (N, 1, Hi, Wi) -> (N*Hi*Wi,)
                x.permute(0, 2, 3, 1).reshape(-1) for x in ctrness_pred
            ],
            dim=0,
        )

        if len(top_feats) > 0:
            instances.top_feats = cat(
                [
                    # Reshape: (N, -1, Hi, Wi) -> (N*Hi*Wi, -1)
                    x.permute(0, 2, 3, 1).reshape(-1, x.size(1))
                    for x in top_feats
                ],
                dim=0,
            )

        return self.fcos_losses(instances)

Esempio n. 18

0

Mostra file

    def inference_single_image(
        self, pred_logits, pred_deltas, pred_masks, anchors, indexes, image_size
    ):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            pred_logits (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (AxHxW, K)
            pred_deltas (list[Tensor]): Same shape as 'pred_logits' except that K becomes 4.
            pred_masks (list[list[Tensor]]): List of #feature levels, each is a list of #anchors.
                Each entry contains tensor of size (M_i*M_i, H, W). `None` if mask_on=False.
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors for that
                image in that feature level.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        pred_logits = pred_logits.flatten().sigmoid_()
        # We get top locations across all levels to accelerate the inference speed,
        # which does not seem to affect the accuracy.
        # First select values above the threshold
        logits_top_idxs = torch.where(pred_logits > self.score_threshold)[0]
        # Then get the top values
        num_topk = min(self.topk_candidates, logits_top_idxs.shape[0])
        pred_prob, topk_idxs = pred_logits[logits_top_idxs].sort(descending=True)
        # Keep top k scoring values
        pred_prob = pred_prob[:num_topk]
        # Keep top k values
        top_idxs = logits_top_idxs[topk_idxs[:num_topk]]

        # class index
        cls_idxs = top_idxs % self.num_classes
        # HWA index
        top_idxs //= self.num_classes
        # predict boxes
        pred_boxes = self.box2box_transform.apply_deltas(
            pred_deltas[top_idxs], anchors[top_idxs].tensor
        )
        # apply caffe_nms
        keep = batched_nms(pred_boxes, pred_prob, cls_idxs, self.nms_threshold)
        # pick the top ones
        keep = keep[: self.detections_im]

        results = Instances(image_size)
        results.pred_boxes = Boxes(pred_boxes[keep])
        results.scores = pred_prob[keep]
        results.pred_classes = cls_idxs[keep]

        # deal with masks
        result_masks, result_anchors = [], None
        if self.mask_on:
            # index and anchors, useful for masks
            top_indexes = indexes[top_idxs]
            top_anchors = anchors[top_idxs]
            result_indexes = top_indexes[keep]
            result_anchors = top_anchors[keep]
            # Get masks and do sigmoid
            for lvl, _, h, w, anc in result_indexes.tolist():
                cur_size = self.mask_sizes[anc] * (2 ** lvl if self.bipyramid_on else 1)
                result_masks.append(
                    torch.sigmoid(pred_masks[lvl][anc][:, h, w].view(1, cur_size, cur_size))
                )

        return results, (result_masks, result_anchors)

Esempio n. 19

0

Mostra file

    def _inference_one_image(self, input):
        """
        Args:
            input (dict): one dataset dict

        Returns:
            dict: one output dict
        """
        augmented_inputs = self.tta_mapper(input)

        do_hflip = [k.pop("horiz_flip", False) for k in augmented_inputs]
        heights = [k["height"] for k in augmented_inputs]
        widths = [k["width"] for k in augmented_inputs]
        assert (
            len(set(heights)) == 1 and len(set(widths)) == 1
        ), "Augmented version of the inputs should have the same original resolution!"
        height = heights[0]
        width = widths[0]

        # 1. Detect boxes from all augmented versions
        # 1.1: forward with all augmented images
        with self._turn_off_roi_head("mask_on"), self._turn_off_roi_head("keypoint_on"):
            # temporarily disable mask/keypoint head
            outputs = self._batch_inference(augmented_inputs, do_postprocess=False)
        # 1.2: union the results
        all_boxes = []
        all_scores = []
        all_classes = []
        for idx, output in enumerate(outputs):
            rescaled_output = detector_postprocess(output, height, width)
            pred_boxes = rescaled_output.pred_boxes.tensor
            if do_hflip[idx]:
                pred_boxes[:, [0, 2]] = width - pred_boxes[:, [2, 0]]
            all_boxes.append(pred_boxes)
            all_scores.extend(rescaled_output.scores)
            all_classes.extend(rescaled_output.pred_classes)
        all_boxes = torch.cat(all_boxes, dim=0).cpu()
        num_boxes = len(all_boxes)

        # 1.3: select from the union of all results
        num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES
        # +1 because fast_rcnn_inference expects background scores as well
        all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device)
        for idx, cls, score in zip(count(), all_classes, all_scores):
            all_scores_2d[idx, cls] = score

        merged_instances, _ = fast_rcnn_inference_single_image(
            all_boxes,
            all_scores_2d,
            (height, width),
            1e-8,
            self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
            self.cfg.TEST.DETECTIONS_PER_IMAGE,
        )

        if not self.cfg.MODEL.MASK_ON:
            return {"instances": merged_instances}

        # 2. Use the detected boxes to obtain masks
        # 2.1: rescale the detected boxes
        augmented_instances = []
        for idx, input in enumerate(augmented_inputs):
            actual_height, actual_width = input["image"].shape[1:3]
            scale_x = actual_width * 1.0 / width
            scale_y = actual_height * 1.0 / height
            pred_boxes = merged_instances.pred_boxes.clone()
            pred_boxes.tensor[:, 0::2] *= scale_x
            pred_boxes.tensor[:, 1::2] *= scale_y
            if do_hflip[idx]:
                pred_boxes.tensor[:, [0, 2]] = actual_width - pred_boxes.tensor[:, [2, 0]]

            aug_instances = Instances(
                image_size=(actual_height, actual_width),
                pred_boxes=pred_boxes,
                pred_classes=merged_instances.pred_classes,
                scores=merged_instances.scores,
            )
            augmented_instances.append(aug_instances)
        # 2.2: run forward on the detected boxes
        outputs = self._batch_inference(augmented_inputs, augmented_instances, do_postprocess=False)
        for idx, output in enumerate(outputs):
            if do_hflip[idx]:
                output.pred_masks = output.pred_masks.flip(dims=[3])
        # 2.3: average the predictions
        all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0)
        avg_pred_masks = torch.mean(all_pred_masks, dim=0)
        output = outputs[0]
        output.pred_masks = avg_pred_masks
        output = detector_postprocess(output, height, width)
        return {"instances": output}

Esempio n. 20

0

Mostra file

def get_empty_instance(h, w):
    inst = Instances((h, w))
    inst.gt_boxes = Boxes(torch.rand(0, 4))
    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
    inst.gt_masks = BitMasks(torch.rand(0, h, w))
    return inst

Esempio n. 21

0

Mostra file

def extract_feat_multigpu(split_idx, img_list, cfg, args, actor: ActorHandle): # NOTE ray  
    num_images = len(img_list)
    print('Number of images on split{}: {}.'.format(split_idx, num_images))

    model = DefaultTrainer.build_model(cfg)
    DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
        cfg.MODEL.WEIGHTS, resume=args.resume
    )
    model.eval()

    for im_file in (img_list):
        if os.path.exists(os.path.join(args.output_dir, im_file.split('.')[0]+'.npz')):
            actor.update.remote(1)  # NOTE ray
            continue
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        if im is None:
            print(os.path.join(args.image_dir, im_file), "is illegal!")
            actor.update.remote(1) # NOTE ray
            continue
        dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)
        # extract roi features
        if cfg.MODEL.BUA.EXTRACTOR.MODE == 1:
            attr_scores = None
            with torch.set_grad_enabled(False):
                if cfg.MODEL.BUA.ATTRIBUTE_ON:
                    boxes, scores, features_pooled, attr_scores = model([dataset_dict])
                else:
                    boxes, scores, features_pooled = model([dataset_dict])
            boxes = [box.tensor.cpu() for box in boxes]
            scores = [score.cpu() for score in scores]
            features_pooled = [feat.cpu() for feat in features_pooled]
            if not attr_scores is None:
                attr_scores = [attr_score.cpu() for attr_score in attr_scores]
            generate_npz(1, 
                args, cfg, im_file, im, dataset_dict, 
                boxes, scores, features_pooled, attr_scores)
        # extract bbox only
        elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2:
            with torch.set_grad_enabled(False):
                boxes, scores = model([dataset_dict])
            boxes = [box.cpu() for box in boxes]
            scores = [score.cpu() for score in scores]
            generate_npz(2,
                args, cfg, im_file, im, dataset_dict, 
                boxes, scores)
        # extract roi features by bbox
        elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3:
            if not os.path.exists(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz')):
                actor.update.remote(1) # NOTE ray
                continue
            bbox = torch.from_numpy(np.load(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz'))['bbox']) * dataset_dict['im_scale']
            proposals = Instances(dataset_dict['image'].shape[-2:])
            proposals.proposal_boxes = BUABoxes(bbox)
            dataset_dict['proposals'] = proposals

            attr_scores = None
            with torch.set_grad_enabled(False):
                if cfg.MODEL.BUA.ATTRIBUTE_ON:
                    boxes, scores, features_pooled, attr_scores = model([dataset_dict])
                else:
                    boxes, scores, features_pooled = model([dataset_dict])
            boxes = [box.tensor.cpu() for box in boxes]
            scores = [score.cpu() for score in scores]
            features_pooled = [feat.cpu() for feat in features_pooled]
            if not attr_scores is None:
                attr_scores = [attr_score.data.cpu() for attr_score in attr_scores]
            generate_npz(3, 
                args, cfg, im_file, im, dataset_dict, 
                boxes, scores, features_pooled, attr_scores)

        actor.update.remote(1) # NOTE ray

Esempio n. 22

0

Mostra file

File: proposal_utils.py Progetto: wenjun90/DETR.detectron2

def find_top_rpn_proposals(
    proposals: List[torch.Tensor],
    pred_objectness_logits: List[torch.Tensor],
    image_sizes: List[Tuple[int, int]],
    nms_thresh: float,
    pre_nms_topk: int,
    post_nms_topk: int,
    min_box_size: int,
    training: bool,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps for each image.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        image_sizes (list[tuple]): sizes (h, w) for each image
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_size (float): minimum proposal box side length in pixels (absolute units
            wrt input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        list[Instances]: list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i, sorted by their
            objectness score in descending order.
    """
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 4

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = Boxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        lvl = level_ids

        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            if training:
                raise FloatingPointError(
                    "Predicted boxes or scores contain Inf/NaN. Training has diverged."
                )
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
            lvl = lvl[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_size)
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = boxes[keep], scores_per_img[
                keep], lvl[keep]

        keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]  # keep is already sorted

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results

Esempio n. 23

0

Mostra file

    def construct_hopairs(self, instances: List[Instances]) -> List[Instances]:
        """
        Prepare person-object pairs to be used to train HOI heads.
        At training, it returns union regions of person-object proposals and assigns
            training labels. It returns ``self.hoi_batch_size_per_image`` random samples
            from pesron-object pairs, with a fraction of positives that is no larger than
            ``self.hoi_positive_sample_fraction``.
        At inference, it returns union regions of predicted person boxes and object boxes.

        Args:
            instances (list[Instances]):
                At training, proposals_with_gt. See ``self.label_and_sample_proposals``
                At inference, predicted box instances. See ``self._forward_box``

        Returns:
            list[Instances]:
                length `N` list of `Instances`s containing the human-object pairs.
                Each `Instances` has the following fields:

                - union_boxes: the union region of person boxes and object boxes
                - person_boxes: person boxes in a matched sequences with union_boxes
                - object_boxes: object boxes in a matched sequences with union_boxes
                - gt_actions: the ground-truth actions that the pair is assigned.
                    Used for training HOI head.
                - person_box_scores: person box scores from box instances. Used at inference.
                - object_box_scores: object box scores from box instances. Used at inference.
                - object_box_classes: predicted box classes from box instances. Used at inference.
        """
        hopairs = []
        for instances_per_image in instances:
            if self.training:
                # Proposals generated from person branch in HORPN will be seen as person boxes;
                # Proposals generated from object branch in HORPN will be object boxes.
                boxes = instances_per_image.proposal_boxes
                person_idxs = (instances_per_image.is_person == 1).nonzero().squeeze(1)
                object_idxs = (instances_per_image.is_person == 0).nonzero().squeeze(1)
            else:
                # At inference, split person/object boxes based on predicted classes by box head
                boxes = instances_per_image.pred_boxes
                person_idxs = torch.nonzero(instances_per_image.pred_classes == 0).squeeze(1)
                object_idxs = torch.nonzero(instances_per_image.pred_classes >  0).squeeze(1)
            
            if self.allow_person_to_person:
                # Allow person to person interactions. Then all boxes will be used.
                object_idxs = torch.arange(len(instances_per_image), device=object_idxs.device)

            num_pboxes, num_oboxes = person_idxs.numel(), object_idxs.numel()

            union_boxes = _pairwise_union_regions(boxes[person_idxs], boxes[object_idxs])
            # Indexing person/object boxes in a matched order.
            person_idxs = person_idxs[:, None].repeat(1, num_oboxes).flatten()
            object_idxs = object_idxs[None, :].repeat(num_pboxes, 1).flatten()
            # Remove self-to-self interaction.
            keep = (person_idxs != object_idxs).nonzero().squeeze(1)
            union_boxes = union_boxes[keep]
            person_idxs = person_idxs[keep]
            object_idxs = object_idxs[keep]

            hopairs_per_image = Instances(instances_per_image.image_size)
            hopairs_per_image.union_boxes = union_boxes
            hopairs_per_image.person_boxes = boxes[person_idxs]
            hopairs_per_image.object_boxes = boxes[object_idxs]

            if self.training:
                # `person_idxs` and `object_idxs` are used in self.label_and_sample_hopairs()
                hopairs_per_image.person_idxs = person_idxs
                hopairs_per_image.object_idxs = object_idxs
            else:
                hopairs_per_image.person_box_scores = instances_per_image.scores[person_idxs]
                hopairs_per_image.object_box_scores = instances_per_image.scores[object_idxs]
                hopairs_per_image.object_box_classes = instances_per_image.pred_classes[object_idxs]
            
            hopairs.append(hopairs_per_image)

        if self.training:
            hopairs = self.label_and_sample_hopairs(hopairs, instances)

        return hopairs

Esempio n. 24

0

Mostra file

File: test_rpn.py Progetto: peternara/Res2Net-detectron2-object-detection

    def test_rpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator"
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1)
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = Boxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.0804563984),
            "loss_rpn_loc": torch.tensor(0.0990132466),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
            Boxes(
                torch.tensor(
                    [
                        [0, 0, 30, 20],
                        [0, 0, 16.7862777710, 13.1362524033],
                        [0, 0, 30, 13.3173446655],
                        [0, 0, 10.8602609634, 20],
                        [7.7165775299, 0, 27.3875980377, 20],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor([0.1225359365, -0.0133192837]),
            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
        ]

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            self.assertTrue(
                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
            )
            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))

Esempio n. 25

0

Mostra file

def assemble_rcnn_outputs_by_name(image_sizes,
                                  tensor_outputs,
                                  force_mask_on=False):
    """
    A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
    to detectron2's format (i.e. list of Instances instance).
    This only works when the model follows the Caffe2 detectron's naming convention.

    Args:
        image_sizes (List[List[int, int]]): [H, W] of every image.
        tensor_outputs (Dict[str, Tensor]): external_output to its tensor.

        force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
            if the mask is not found from tensor_outputs (usually due to model crash)
    """

    results = [Instances(image_size) for image_size in image_sizes]

    batch_splits = tensor_outputs.get("batch_splits", None)
    if batch_splits:
        raise NotImplementedError()
    assert len(image_sizes) == 1
    result = results[0]

    bbox_nms = tensor_outputs["bbox_nms"]
    score_nms = tensor_outputs["score_nms"]
    class_nms = tensor_outputs["class_nms"]
    # Detection will always success because Conv support 0-batch
    assert bbox_nms is not None
    assert score_nms is not None
    assert class_nms is not None
    if bbox_nms.shape[1] == 5:
        result.pred_boxes = RotatedBoxes(bbox_nms)
    else:
        result.pred_boxes = Boxes(bbox_nms)
    result.scores = score_nms
    result.pred_classes = class_nms.to(torch.int64)

    mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
    if mask_fcn_probs is not None:
        # finish the mask pred
        mask_probs_pred = mask_fcn_probs
        num_masks = mask_probs_pred.shape[0]
        class_pred = result.pred_classes
        indices = torch.arange(num_masks, device=class_pred.device)
        mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
        result.pred_masks = mask_probs_pred
    elif force_mask_on:
        # NOTE: there's no way to know the height/width of mask here, it won't be
        # used anyway when batch size is 0, so just set them to 0.
        result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)

    keypoints_out = tensor_outputs.get("keypoints_out", None)
    kps_score = tensor_outputs.get("kps_score", None)
    if keypoints_out is not None:
        # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
        keypoints_tensor = keypoints_out
        # NOTE: it's possible that prob is not calculated if "should_output_softmax"
        # is set to False in HeatmapMaxKeypoint, so just using raw score, seems
        # it doesn't affect mAP. TODO: check more carefully.
        keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
        result.pred_keypoints = keypoint_xyp
    elif kps_score is not None:
        # keypoint heatmap to sparse data structure
        pred_keypoint_logits = kps_score
        keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])

    return results

Esempio n. 26

0

Mostra file

File: test_rpn.py Progetto: peternara/Res2Net-detectron2-object-detection

    def test_rrpn(self):
        torch.manual_seed(121)
        cfg = get_cfg()
        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
        backbone = build_backbone(cfg)
        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        image_shape = (15, 15)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
        gt_instances = Instances(image_shape)
        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
        with EventStorage():  # capture events in a new storage to discard them
            proposals, proposal_losses = proposal_generator(
                images, features, [gt_instances[0], gt_instances[1]]
            )

        expected_losses = {
            "loss_rpn_cls": torch.tensor(0.043263837695121765),
            "loss_rpn_loc": torch.tensor(0.14432406425476074),
        }
        for name in expected_losses.keys():
            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
                name, proposal_losses[name], expected_losses[name]
            )
            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)

        expected_proposal_boxes = [
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
                    ]
                )
            ),
            RotatedBoxes(
                torch.tensor(
                    [
                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
                    ]
                )
            ),
        ]

        expected_objectness_logits = [
            torch.tensor(
                [
                    0.10111768,
                    0.09112845,
                    0.08466332,
                    0.07589971,
                    0.06650183,
                    0.06350251,
                    0.04299347,
                    0.01864817,
                    0.00986163,
                    0.00078543,
                    -0.04573630,
                    -0.04799230,
                ]
            ),
            torch.tensor(
                [
                    0.11373727,
                    0.09377633,
                    0.05281663,
                    0.05143715,
                    0.04040275,
                    0.03250912,
                    0.01307789,
                    0.01177734,
                    0.00038105,
                    -0.00540255,
                    -0.01194804,
                    -0.01461012,
                    -0.03061717,
                    -0.03599222,
                ]
            ),
        ]

        torch.set_printoptions(precision=8, sci_mode=False)

        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
        ):
            self.assertEqual(len(proposal), len(expected_proposal_box))
            self.assertEqual(proposal.image_size, im_size)
            # It seems that there's some randomness in the result across different machines:
            # This test can be run on a local machine for 100 times with exactly the same result,
            # However, a different machine might produce slightly different results,
            # thus the atol here.
            err_msg = "computed proposal boxes = {}, expected {}".format(
                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
            )
            self.assertTrue(
                torch.allclose(
                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
                ),
                err_msg,
            )

            err_msg = "computed objectness logits = {}, expected {}".format(
                proposal.objectness_logits, expected_objectness_logit
            )
            self.assertTrue(
                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
                err_msg,
            )

Esempio n. 27

0

Mostra file

File: fast_rcnn_wsddn.py Progetto: suilin0432/UWSOD

def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """

    all_scores = scores.clone()
    all_scores = torch.unsqueeze(all_scores, 0)
    all_boxes = boxes.clone()
    all_boxes = torch.unsqueeze(all_boxes, 0)

    pred_inds = torch.unsqueeze(
        torch.arange(scores.size(0), device=scores.device, dtype=torch.long), dim=1
    ).repeat(1, scores.size(1))

    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        pred_inds = pred_inds[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    pred_inds = pred_inds[:, :-1]

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    pred_inds = pred_inds[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    pred_inds = pred_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.pred_inds = pred_inds
    return result, filter_inds[:, 0], all_scores, all_boxes

Esempio n. 28

0

Mostra file

File: postprocessing.py Progetto: suilin0432/UWSOD

def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
    """
    Resize the output instances.
    The input images are often resized when entering an object detector.
    As a result, we often need the outputs of the detector in a different
    resolution from its inputs.

    This function will resize the raw outputs of an R-CNN detector
    to produce outputs according to the desired output resolution.

    Args:
        results (Instances): the raw outputs from the detector.
            `results.image_size` contains the input image resolution the detector sees.
            This object might be modified in-place.
        output_height, output_width: the desired output resolution.

    Returns:
        Instances: the resized output from the model, based on the output resolution
    """

    # Converts integer tensors to float temporaries
    #   to ensure true division is performed when
    #   computing scale_x and scale_y.
    if isinstance(output_width, torch.Tensor):
        output_width_tmp = output_width.float()
    else:
        output_width_tmp = output_width

    if isinstance(output_height, torch.Tensor):
        output_height_tmp = output_height.float()
    else:
        output_height_tmp = output_height

    scale_x, scale_y = (
        output_width_tmp / results.image_size[1],
        output_height_tmp / results.image_size[0],
    )
    results = Instances((output_height, output_width), **results.get_fields())

    if results.has("pred_boxes"):
        output_boxes = results.pred_boxes
    elif results.has("proposal_boxes"):
        output_boxes = results.proposal_boxes

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(results.image_size)

    results = results[output_boxes.nonempty()]

    if results.has("pred_masks") and results.has("no_paste"):
        results.pred_masks = F.interpolate(
            results.pred_masks,
            size=(output_height, output_width),
            mode="bilinear",
            align_corners=False,
        )
        results.pred_masks = (results.pred_masks[:, 0, :, :] >= mask_threshold).to(dtype=torch.bool)
    elif results.has("pred_masks"):
        results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)(
            results.pred_masks[:, 0, :, :],  # N, 1, M, M
            results.pred_boxes,
            results.image_size,
            threshold=mask_threshold,
        )

    if results.has("pred_keypoints"):
        results.pred_keypoints[:, :, 0] *= scale_x
        results.pred_keypoints[:, :, 1] *= scale_y

    return results

Esempio n. 29

0

Mostra file

File: roi_heads_cmil.py Progetto: suilin0432/UWSOD

    def get_pgt(self, prev_pred_boxes, prev_pred_scores, proposals, suffix):
        if isinstance(prev_pred_scores, torch.Tensor):
            num_preds_per_image = [len(p) for p in proposals]
            prev_pred_scores = prev_pred_scores.split(num_preds_per_image, dim=0)
        else:
            assert isinstance(prev_pred_scores, list)
            assert isinstance(prev_pred_scores[0], torch.Tensor)

        prev_pred_scores = [
            torch.index_select(prev_pred_score, 1, gt_int)
            for prev_pred_score, gt_int in zip(prev_pred_scores, self.gt_classes_img_int)
        ]
        pgt_scores_idxs = [
            torch.max(prev_pred_score, dim=0) for prev_pred_score in prev_pred_scores
        ]
        pgt_scores = [item[0] for item in pgt_scores_idxs]
        pgt_idxs = [item[1] for item in pgt_scores_idxs]

        assert isinstance(prev_pred_boxes, tuple) or isinstance(prev_pred_boxes, list)
        if isinstance(prev_pred_boxes[0], Boxes):
            pgt_boxes = [
                prev_pred_box[pgt_idx] for prev_pred_box, pgt_idx in zip(prev_pred_boxes, pgt_idxs)
            ]
        else:
            assert isinstance(prev_pred_boxes[0], torch.Tensor)
            if self.cls_agnostic_bbox_reg:
                num_preds = [prev_pred_box.size(0) for prev_pred_box in prev_pred_boxes]
                prev_pred_boxes = [
                    prev_pred_box.unsqueeze(1).expand(num_pred, self.num_classes, 4)
                    for num_pred, prev_pred_box in zip(num_preds, prev_pred_boxes)
                ]
            prev_pred_boxes = [
                prev_pred_box.view(-1, self.num_classes, 4) for prev_pred_box in prev_pred_boxes
            ]
            prev_pred_boxes = [
                torch.index_select(prev_pred_box, 1, gt_int)
                for prev_pred_box, gt_int in zip(prev_pred_boxes, self.gt_classes_img_int)
            ]
            pgt_boxes = [
                torch.index_select(prev_pred_box, 0, pgt_idx)
                for prev_pred_box, pgt_idx in zip(prev_pred_boxes, pgt_idxs)
            ]
            pgt_boxes = [pgt_box.view(-1, 4) for pgt_box in pgt_boxes]
            diags = [
                torch.tensor(
                    [i * gt_split.numel() + i for i in range(gt_split.numel())],
                    dtype=torch.int64,
                    device=pgt_boxes[0].device,
                )
                for gt_split in self.gt_classes_img_int
            ]
            pgt_boxes = [
                torch.index_select(pgt_box, 0, diag) for pgt_box, diag in zip(pgt_boxes, diags)
            ]
            pgt_boxes = [Boxes(pgt_box) for pgt_box in pgt_boxes]

        pgt_classes = self.gt_classes_img_int
        pgt_weights = [
            torch.index_select(pred_logits, 1, pgt_class).reshape(-1)
            for pred_logits, pgt_class in zip(
                self.pred_class_img_logits.split(1, dim=0), pgt_classes
            )
        ]

        targets = [
            Instances(
                proposals[i].image_size,
                gt_boxes=pgt_box,
                gt_classes=pgt_class,
                gt_scores=pgt_score,
                gt_weights=pgt_weight,
            )
            for i, (pgt_box, pgt_class, pgt_score, pgt_weight) in enumerate(
                zip(pgt_boxes, pgt_classes, pgt_scores, pgt_weights)
            )
        ]

        self._vis_pgt(targets, "pgt", suffix)

        return targets

Esempio n. 30

0

Mostra file

File: d2_deserializer.py Progetto: billdoors/detectron2-sagemaker

HEIGHT = 480
WIDTH = 640

PREDICTIONS = {'scores': [0.9974532723426819, 0.9938008189201355, 0.988240122795105, 0.9850716590881348, 0.9844247102737427, 0.9810763597488403, 0.9800938963890076, 0.9720492362976074, 0.9688801765441895, 0.9674813747406006, 0.927189290523529, 0.9221163988113403, 0.9185774326324463, 0.9142449498176575, 0.8913487792015076, 0.8826121687889099, 0.8605493903160095, 0.8423078656196594, 0.8416911363601685, 0.8005133271217346, 0.7691927552223206, 0.7283533811569214, 0.7125754356384277, 0.6947720050811768, 0.6323946118354797, 0.5554373264312744, 0.502210259437561], 
 'pred_classes': [0, 2, 2, 2, 9, 0, 2, 0, 2, 0, 0, 5, 24, 9, 2, 2, 9, 5, 9, 2, 2, 0, 2, 0, 9, 9, 9],
 'pred_boxes': [[309.5509033203125, 163.2444610595703, 434.3181457519531, 475.4096984863281], [3.7802395820617676, 242.15467834472656, 537.434814453125, 432.81524658203125], [14.279938697814941, 212.42967224121094, 45.97575759887695, 233.4514617919922], [421.2859191894531, 208.60498046875, 556.5548706054688, 309.9937744140625], [143.9969940185547, 143.22933959960938, 152.34913635253906, 159.94985961914062], [582.5836181640625, 198.43775939941406, 600.1054077148438, 266.8861389160156], [57.001808166503906, 211.5207061767578, 77.75701141357422, 229.96949768066406], [134.99818420410156, 208.92091369628906, 189.38192749023438, 278.38665771484375], [75.46967315673828, 209.60325622558594, 103.36743927001953, 230.1490478515625], [545.7862548828125, 202.4793243408203, 569.5499267578125, 269.63946533203125], [605.6588745117188, 201.08189392089844, 626.3668823242188, 269.6811218261719], [183.23878479003906, 177.90478515625, 242.53317260742188, 238.60890197753906], [129.665771484375, 228.85916137695312, 170.4459686279297, 264.7272644042969], [46.01143264770508, 137.7577362060547, 54.27482604980469, 154.9816436767578], [121.1451416015625, 205.1159210205078, 157.94473266601562, 229.43475341796875], [255.93080139160156, 214.98483276367188, 284.3371887207031, 243.56529235839844], [110.03988647460938, 133.47642517089844, 118.6834487915039, 152.92295837402344], [93.23741912841797, 186.40609741210938, 128.8490447998047, 226.1478271484375], [183.7700653076172, 141.0837860107422, 191.97706604003906, 156.7988739013672], [270.9613037109375, 206.3548583984375, 346.9297790527344, 241.0618438720703], [387.1550598144531, 269.3006286621094, 526.6027221679688, 415.30615234375], [0.49352559447288513, 323.8660583496094, 49.01905822753906, 359.7412414550781], [145.3698272705078, 207.95762634277344, 169.30557250976562, 229.77464294433594], [615.1426391601562, 207.83851623535156, 639.5967407226562, 354.58331298828125], [142.42550659179688, 176.94520568847656, 147.10646057128906, 185.7545166015625], [152.2357635498047, 165.64385986328125, 158.73641967773438, 175.6268768310547], [105.40318298339844, 177.60287475585938, 110.80204772949219, 182.9259796142578]]}


Instances(num_instances=27, image_height=480, image_width=640, fields=[scores: [0.9974532723426819, 0.9938008189201355, 0.988240122795105, 0.9850716590881348, 0.9844247102737427, 0.9810763597488403, 0.9800938963890076, 0.9720492362976074, 0.9688801765441895, 0.9674813747406006, 0.927189290523529, 0.9221163988113403, 0.9185774326324463, 0.9142449498176575, 0.8913487792015076, 0.8826121687889099, 0.8605493903160095, 0.8423078656196594, 0.8416911363601685, 0.8005133271217346, 0.7691927552223206, 0.7283533811569214, 0.7125754356384277, 0.6947720050811768, 0.6323946118354797, 0.5554373264312744, 0.502210259437561], pred_classes: [0, 2, 2, 2, 9, 0, 2, 0, 2, 0, 0, 5, 24, 9, 2, 2, 9, 5, 9, 2, 2, 0, 2, 0, 9, 9, 9], pred_boxes: [[309.5509033203125, 163.2444610595703, 434.3181457519531, 475.4096984863281], [3.7802395820617676, 242.15467834472656, 537.434814453125, 432.81524658203125], [14.279938697814941, 212.42967224121094, 45.97575759887695, 233.4514617919922], [421.2859191894531, 208.60498046875, 556.5548706054688, 309.9937744140625], [143.9969940185547, 143.22933959960938, 152.34913635253906, 159.94985961914062], [582.5836181640625, 198.43775939941406, 600.1054077148438, 266.8861389160156], [57.001808166503906, 211.5207061767578, 77.75701141357422, 229.96949768066406], [134.99818420410156, 208.92091369628906, 189.38192749023438, 278.38665771484375], [75.46967315673828, 209.60325622558594, 103.36743927001953, 230.1490478515625], [545.7862548828125, 202.4793243408203, 569.5499267578125, 269.63946533203125], [605.6588745117188, 201.08189392089844, 626.3668823242188, 269.6811218261719], [183.23878479003906, 177.90478515625, 242.53317260742188, 238.60890197753906], [129.665771484375, 228.85916137695312, 170.4459686279297, 264.7272644042969], [46.01143264770508, 137.7577362060547, 54.27482604980469, 154.9816436767578], [121.1451416015625, 205.1159210205078, 157.94473266601562, 229.43475341796875], [255.93080139160156, 214.98483276367188, 284.3371887207031, 243.56529235839844], [110.03988647460938, 133.47642517089844, 118.6834487915039, 152.92295837402344], [93.23741912841797, 186.40609741210938, 128.8490447998047, 226.1478271484375], [183.7700653076172, 141.0837860107422, 191.97706604003906, 156.7988739013672], [270.9613037109375, 206.3548583984375, 346.9297790527344, 241.0618438720703], [387.1550598144531, 269.3006286621094, 526.6027221679688, 415.30615234375], [0.49352559447288513, 323.8660583496094, 49.01905822753906, 359.7412414550781], [145.3698272705078, 207.95762634277344, 169.30557250976562, 229.77464294433594], [615.1426391601562, 207.83851623535156, 639.5967407226562, 354.58331298828125], [142.42550659179688, 176.94520568847656, 147.10646057128906, 185.7545166015625], [152.2357635498047, 165.64385986328125, 158.73641967773438, 175.6268768310547], [105.40318298339844, 177.60287475585938, 110.80204772949219, 182.9259796142578]]])


Instances(num_instances=27, image_height=480, image_width=640, fields=[pred_boxes: Boxes(tensor([[3.0955e+02, 1.6324e+02, 4.3432e+02, 4.7541e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [3.7802e+00, 2.4215e+02, 5.3743e+02, 4.3282e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.4280e+01, 2.1243e+02, 4.5976e+01, 2.3345e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [4.2129e+02, 2.0860e+02, 5.5655e+02, 3.0999e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.4400e+02, 1.4323e+02, 1.5235e+02, 1.5995e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [5.8258e+02, 1.9844e+02, 6.0011e+02, 2.6689e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [5.7002e+01, 2.1152e+02, 7.7757e+01, 2.2997e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.3500e+02, 2.0892e+02, 1.8938e+02, 2.7839e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [7.5470e+01, 2.0960e+02, 1.0337e+02, 2.3015e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [5.4579e+02, 2.0248e+02, 5.6955e+02, 2.6964e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [6.0566e+02, 2.0108e+02, 6.2637e+02, 2.6968e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.8324e+02, 1.7790e+02, 2.4253e+02, 2.3861e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.2967e+02, 2.2886e+02, 1.7045e+02, 2.6473e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [4.6011e+01, 1.3776e+02, 5.4275e+01, 1.5498e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.2115e+02, 2.0512e+02, 1.5794e+02, 2.2943e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [2.5593e+02, 2.1498e+02, 2.8434e+02, 2.4357e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.1004e+02, 1.3348e+02, 1.1868e+02, 1.5292e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [9.3237e+01, 1.8641e+02, 1.2885e+02, 2.2615e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [1.8377e+02, 1.4108e+02, 1.9198e+02, 1.5680e+02],
2020-05-01 02:29:48,543 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -         [2.7096e+02, 2.0635e+02, 3.4693e+02, 2.4106e+02],