Esempio n. 1
0
def show_annotated_image(img,
                         boxes,
                         labels,
                         scores,
                         class_ids,
                         score_threshold=0.0,
                         default_boxes=None,
                         transform_corners=None,
                         max_dets=None,
                         showfig=False,
                         image_id=None):
    good_ids = torch.nonzero(scores.float() > score_threshold).view(-1)
    if good_ids.numel() > 0:
        if max_dets is not None:
            _, ids = scores[good_ids].sort(descending=False)
            good_ids = good_ids[ids[-max_dets:]]
        boxes = boxes[good_ids].cpu()
        labels = labels[good_ids].cpu()
        scores = scores[good_ids].cpu()
        label_names = ["Cl " + str(l.item()) for l in labels]
        box_colors = ["yellow"] * len(boxes)
    else:
        boxes = BoxList.create_empty(boxes.image_size)
        labels = torch.LongTensor(0)
        scores = torch.FloatTensor(0)
        label_names = []
        box_colors = []

    # create visualizations of default boxes
    if default_boxes is not None:
        default_boxes = default_boxes[good_ids].cpu()

        # append boxes
        boxes = torch.cat([default_boxes.bbox_xyxy, boxes.bbox_xyxy], 0)
        labels = torch.cat(
            [torch.Tensor(len(default_boxes)).to(labels).zero_(), labels], 0)
        scores = torch.cat([
            torch.Tensor(len(default_boxes)).to(scores).fill_(float("nan")),
            scores
        ], 0)
        label_names = [""] * len(default_boxes) + label_names
        box_colors = ["cyan"] * len(default_boxes) + box_colors
    else:
        boxes = boxes.bbox_xyxy

    if transform_corners is not None:
        # draw polygons representing the corners of a transformation
        transform_corners = transform_corners[good_ids].cpu()

    vis_image(img,
              showfig=showfig,
              boxes=boxes,
              scores=scores,
              label_names=label_names,
              colors=box_colors,
              image_id=image_id,
              polygons=transform_corners)
    return
Esempio n. 2
0
    def _transform_image_to_pyramid(self,
                                    image_id,
                                    boxes=None,
                                    do_augmentation=True,
                                    hflip=False,
                                    vflip=False,
                                    pyramid_scales=(1, ),
                                    mined_data=None):
        img = self._get_dataset_image_by_id(image_id)
        img_size = FeatureMapSize(img=img)

        do_augmentation = do_augmentation and self.data_augmentation is not None
        num_pyramid_levels = len(pyramid_scales)

        use_mined_crop = mined_data is not None
        if use_mined_crop:
            crop_position = mined_data["crop_position_xyxy"]

        if boxes is None:
            boxes = BoxList.create_empty(img_size)
        mask_cutoff_boxes = torch.zeros(len(boxes), dtype=torch.bool)
        mask_difficult_boxes = torch.zeros(len(boxes), dtype=torch.bool)

        box_inverse_transform = TransformList()
        # batch level data augmentation
        img, boxes = transforms_boxes.transpose(
            img,
            hflip=hflip,
            vflip=vflip,
            boxes=boxes,
            transform_list=box_inverse_transform)

        if use_mined_crop:
            # update crop_position_xyxy with the symmetries
            if hflip or vflip:
                _, crop_position = transforms_boxes.transpose(
                    img, hflip=hflip, vflip=vflip, boxes=crop_position)

        if do_augmentation:
            if self.data_augmentation.do_random_crop:
                if not use_mined_crop:
                    img, boxes, mask_cutoff_boxes, mask_difficult_boxes = \
                        self.data_augmentation.random_crop(img,
                                                           boxes=boxes,
                                                           transform_list=box_inverse_transform)
                else:
                    img, boxes, mask_cutoff_boxes, mask_difficult_boxes = \
                        self.data_augmentation.crop_image(img, crop_position,
                                                          boxes=boxes,
                                                          transform_list=box_inverse_transform)

                img, boxes = transforms_boxes.resize(
                    img,
                    target_size=self.data_augmentation.random_crop_size,
                    random_interpolation=self.data_augmentation.
                    random_interpolation,
                    boxes=boxes,
                    transform_list=box_inverse_transform)

            # color distortion
            img = self.data_augmentation.random_distort(img)

        random_interpolation = self.data_augmentation.random_interpolation if do_augmentation else False
        img_size = FeatureMapSize(img=img)
        pyramid_sizes = [
            FeatureMapSize(w=int(img_size.w * s), h=int(img_size.h * s))
            for s in pyramid_scales
        ]
        img_pyramid = []
        boxes_pyramid = []
        pyramid_box_inverse_transform = []
        for p_size in pyramid_sizes:
            box_inverse_transform_this_scale = copy.deepcopy(
                box_inverse_transform)
            p_img, p_boxes = transforms_boxes.resize(
                img,
                target_size=p_size,
                random_interpolation=random_interpolation,
                boxes=boxes,
                transform_list=box_inverse_transform_this_scale)

            pyramid_box_inverse_transform.append(
                box_inverse_transform_this_scale)
            img_pyramid.append(p_img)
            boxes_pyramid.append(p_boxes)

        transforms_th = [transforms.ToTensor()]
        if self.img_normalization is not None:
            transforms_th += [
                transforms.Normalize(self.img_normalization["mean"],
                                     self.img_normalization["std"])
            ]

        for i_p in range(num_pyramid_levels):
            img_pyramid[i_p] = transforms.Compose(transforms_th)(
                img_pyramid[i_p])

        return img_pyramid, boxes_pyramid, mask_cutoff_boxes, mask_difficult_boxes, pyramid_box_inverse_transform
def make_iterator_extract_scores_from_images_batched(dataloader,
                                                     maskrcnn_model,
                                                     maskrcnn_config,
                                                     logger,
                                                     image_batch_size=None,
                                                     is_cuda=False):
    logger.info("Starting iterations over images")

    # get images of all classes
    class_images, class_aspect_ratios, class_ids = dataloader.get_all_class_images(
    )
    num_classes = len(class_images)
    assert len(class_aspect_ratios) == num_classes
    assert len(class_ids) == num_classes
    query_img_sizes = [FeatureMapSize(img=img) for img in class_images]

    # loop over all images
    iterator_batches = dataloader.make_iterator_for_all_images(
        image_batch_size)
    for batch_ids, pyramids_batch, box_transforms_batch, initial_img_size_batch in iterator_batches:
        t_start_batch = time.time()
        # extract features at all pyramid levels
        batch_images_pyramid = []
        bboxes_xyxy = []
        labels = []
        scores = []
        num_pyramid_levels = len(pyramids_batch)
        for batch_images in pyramids_batch:
            if is_cuda:
                batch_images = batch_images.cuda()

            # print("Image size:", images_b.size())

            batch_images = [
                dataloader.unnorm_image(img) for img in batch_images
            ]
            batch_images = torch.stack(batch_images, 0)

            bboxes_xyxy_, labels_, scores_ = run_maskrcnn_on_images(
                maskrcnn_model, maskrcnn_config, batch_images)

            bboxes_xyxy.append(bboxes_xyxy_)
            labels.append(labels_)
            scores.append(scores_)
            batch_images_pyramid.append(batch_images)

        for i_image_in_batch, image_id in enumerate(batch_ids):
            # get data from all pyramid levels
            bboxes_xyxy_p = []
            labels_p = []
            scores_p = []
            for i_p in range(num_pyramid_levels):
                bboxes_xyxy_p.append(bboxes_xyxy[i_p][i_image_in_batch])
                labels_p.append(labels[i_p][i_image_in_batch])
                scores_p.append(scores[i_p][i_image_in_batch])

            # get a pyramid of one image[i_p]
            one_image_pyramid = [
                p[i_image_in_batch] for p in batch_images_pyramid
            ]

            # extract the box transformations
            box_reverse_transforms = box_transforms_batch[i_image_in_batch]

            # get the boxes in the correct format
            bboxes_xyxy_p = [
                BoxList(bbox, FeatureMapSize(img=img), mode="xyxy")
                for bbox, img in zip(bboxes_xyxy_p, one_image_pyramid)
            ]
            bboxes_xyxy_p = [
                t(bb) for t, bb in zip(box_reverse_transforms, bboxes_xyxy_p)
            ]

            # add labels and scores into the box structure
            for bb, l, s in zip(bboxes_xyxy_p, labels_p, scores_p):
                bb.add_field("labels", l)
                bb.add_field("scores", s)

            # get the size of the initial image
            initial_img_size = initial_img_size_batch[i_image_in_batch]

            yield image_id, bboxes_xyxy_p, one_image_pyramid, query_img_sizes, class_ids, initial_img_size
Esempio n. 4
0
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5, merge_classes_together=False):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.
    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.
   """
    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)
    for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists):
        pred_bbox = pred_boxlist.bbox_xyxy.cpu().numpy()
        pred_label = pred_boxlist.get_field("labels").numpy()
        pred_score = pred_boxlist.get_field("scores").numpy()
        gt_bbox = gt_boxlist.bbox_xyxy.cpu().numpy()
        gt_label = gt_boxlist.get_field("labels").numpy()
        if gt_boxlist.has_field("difficult"):
            gt_difficult = gt_boxlist.get_field("difficult").numpy()
        else:
            gt_difficult = np.zeros_like(gt_label)

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0,) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1
            iou = boxlist_iou(
                BoxList(pred_bbox_l, gt_boxlist.image_size),
                BoxList(gt_bbox_l, gt_boxlist.image_size),
            ).numpy()
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    if merge_classes_together:
        n_pos = {0: sum(n_pos[i] for i in n_pos)}
        # merge lists together, copy to avoid rewriting the old lists
        old_score = copy.deepcopy(score)
        score = {0: sum((old_score[i] for i in old_score), [])}
        old_match = copy.deepcopy(match)
        match = {0: sum((old_match[i] for i in old_match), [])}

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec, n_pos
    def forward(self, feature_maps):
        """
        Args:
            feature_maps (Tensor[float], size b^A x d x h^A x w^A) - contains the feature map of the input image
            b^A - batch size
            d - feature dimensionality
            h^A - height of the feature map
            w^A - width of the feature map
​
        Returns:
                # here b^C is the class batch size, i.e., the number of class images contained in self.class_batch_size passed when creating this object
            output_localization (Tensor[float], size b^A x b^C x 4 x h^A x w^A) - the localization output w.r.t. the standard box encoding - computed by DetectionBoxCoder.build_loc_targets
            output_recognition (Tensor[float], size size b^A x b^C x 1 x h^A x w^A) - the recognition output for each of the classes - the correlation, linearly converted to [0, 1] segment, the higher the better match to the class
            output_recognition_transform_detached (Tensor[float], size b^A x b^C x 1 x h^A x w^A) - same to output_recognition, but with the computational graph detached from the transformation (for backward  that does not update the transofrmation - intended for the negatives)
            corner_coordinates (Tensor[float], size size b^A x b^C x 8 x h^A x w^A) - the corners of the default boxes after the transofrmation, datached from the computational graph, for visualisation only
        """
        # get dims
        batch_size = feature_maps.size(0)
        feature_dim = feature_maps.size(1)
        image_fm_size = FeatureMapSize(img=feature_maps)
        class_fm_size = FeatureMapSize(img=self.class_feature_maps)
        feature_dim_for_regression = class_fm_size.h * class_fm_size.w

        class_feature_dim = self.class_feature_maps.size(1)
        assert feature_dim == class_feature_dim, "Feature dimensionality of input={0} and class={1} feature maps has to equal".format(
            feature_dim, class_feature_dim)

        # L2-normalize the feature map
        feature_maps = normalize_feature_map_L2(feature_maps, 1e-5)

        # get correlations all to all
        corr_maps = torch.einsum("bfhw,afxy->abwhxy", self.class_feature_maps,
                                 feature_maps)
        # need to try to optimize this with opt_einsum: https://optimized-einsum.readthedocs.io/en/latest/
        # CAUTION: note the switch of dimensions hw to wh. This is done for compatability with the FeatureCorrelation class by Ignacio Rocco https://github.com/ignacio-rocco/ncnet/blob/master/lib/model.py (to be able to load their models)

        # reshape to have the correlation map of dimensions similar to the standard tensor for image feature maps
        corr_maps = corr_maps.contiguous().view(
            batch_size * self.class_batch_size, feature_dim_for_regression,
            image_fm_size.h, image_fm_size.w)

        # compute the grids to resample corr maps
        resampling_grids_local_coord = self.aligner(corr_maps)

        # build classifications outputs
        cor_maps_for_recognition = corr_maps.contiguous().view(
            batch_size, self.class_batch_size, feature_dim_for_regression,
            image_fm_size.h, image_fm_size.w)
        resampling_grids_local_coord = resampling_grids_local_coord.contiguous(
        ).view(batch_size, self.class_batch_size, image_fm_size.h,
               image_fm_size.w, self.aligner.out_grid_size.h,
               self.aligner.out_grid_size.w, 2)

        # need to recompute resampling_grids to [-1, 1] coordinates w.r.t. the feature maps to sample points with F.grid_sample
        # first get the list of boxes that corresponds to the receptive fields of the parameter regression network: box sizes are the receptive field sizes, stride is the network stride
        default_boxes_xyxy_wrt_fm = self.box_grid_generator_feature_map_level.create_strided_boxes_columnfirst(
            fm_size=image_fm_size)

        default_boxes_xyxy_wrt_fm = default_boxes_xyxy_wrt_fm.view(
            1, 1, image_fm_size.h, image_fm_size.w, 4)
        # 1 (to broadcast to batch_size) x 1 (to broadcast to class batch_size) x  box_grid_height x box_grid_width x 4
        default_boxes_xyxy_wrt_fm = default_boxes_xyxy_wrt_fm.to(
            resampling_grids_local_coord.device)
        resampling_grids_fm_coord = convert_box_coordinates_local_to_global(
            resampling_grids_local_coord, default_boxes_xyxy_wrt_fm)

        # covert to coordinates normalized to [-1, 1] (to be compatible with torch.nn.functional.grid_sample)
        resampling_grids_fm_coord_x = resampling_grids_fm_coord.narrow(
            -1, 0, 1)
        resampling_grids_fm_coord_y = resampling_grids_fm_coord.narrow(
            -1, 1, 1)
        resampling_grids_fm_coord_unit = torch.cat([
            resampling_grids_fm_coord_x / (image_fm_size.w - 1) * 2 - 1,
            resampling_grids_fm_coord_y / (image_fm_size.h - 1) * 2 - 1
        ],
                                                   dim=-1)
        # clamp to fit the image plane
        resampling_grids_fm_coord_unit = resampling_grids_fm_coord_unit.clamp(
            -1, 1)

        # extract and pool matches
        # # slower code:
        # matches_summed = self.resample_of_correlation_map_simple(cor_maps_for_recognition,
        #                                                          resampling_grids_fm_coord_unit,
        #                                                          self.class_pool_mask)

        # we use faster, but somewhat more obscure version
        matches_summed = self.resample_of_correlation_map_fast(
            cor_maps_for_recognition, resampling_grids_fm_coord_unit,
            self.class_pool_mask)
        if matches_summed.requires_grad:
            matches_summed_transform_detached = self.resample_of_correlation_map_fast(
                cor_maps_for_recognition,
                resampling_grids_fm_coord_unit.detach(), self.class_pool_mask)
        else:
            # Optimization to make eval faster
            matches_summed_transform_detached = matches_summed

        # build localization targets
        default_boxes_xyxy_wrt_image = self.box_grid_generator_image_level.create_strided_boxes_columnfirst(
            fm_size=image_fm_size)

        default_boxes_xyxy_wrt_image = default_boxes_xyxy_wrt_image.view(
            1, 1, image_fm_size.h, image_fm_size.w, 4)
        # 1 (to broadcast to batch_size) x 1 (to broadcast to class batch_size) x  box_grid_height x box_grid_width x 4
        default_boxes_xyxy_wrt_image = default_boxes_xyxy_wrt_image.to(
            resampling_grids_local_coord.device)
        resampling_grids_image_coord = convert_box_coordinates_local_to_global(
            resampling_grids_local_coord, default_boxes_xyxy_wrt_image)

        num_pooled_points = self.aligner.out_grid_size.w * self.aligner.out_grid_size.h
        resampling_grids_x = resampling_grids_image_coord.narrow(
            -1, 0, 1).contiguous().view(-1, num_pooled_points)
        resampling_grids_y = resampling_grids_image_coord.narrow(
            -1, 1, 1).contiguous().view(-1, num_pooled_points)
        class_boxes_xyxy = torch.stack([
            resampling_grids_x.min(dim=1)[0],
            resampling_grids_y.min(dim=1)[0],
            resampling_grids_x.max(dim=1)[0],
            resampling_grids_y.max(dim=1)[0]
        ], 1)

        # extract rectangle borders to draw complete boxes
        corner_coordinates = resampling_grids_image_coord[:, :, :, :, [
            0, -1
        ]][:, :, :, :, :, [0, -1]]  # only the corners
        corner_coordinates = corner_coordinates.detach_()
        corner_coordinates = corner_coordinates.view(
            batch_size, self.class_batch_size, image_fm_size.h,
            image_fm_size.w,
            8)  # batch_size x label_batch_size x fm_height x fm_width x 8
        corner_coordinates = corner_coordinates.transpose(3, 4).transpose(
            2, 3)  # batch_size x label_batch_size x 5 x fm_height x fm_width

        class_boxes = BoxList(class_boxes_xyxy.view(-1, 4),
                              image_fm_size,
                              mode="xyxy")
        default_boxes_wrt_image = BoxList(default_boxes_xyxy_wrt_image.view(
            -1, 4),
                                          image_fm_size,
                                          mode="xyxy")
        default_boxes_with_image_batches = cat_boxlist(
            [default_boxes_wrt_image] * batch_size * self.class_batch_size)

        output_localization = Os2dBoxCoder.build_loc_targets(
            class_boxes, default_boxes_with_image_batches)  # num_boxes x 4
        output_localization = output_localization.view(
            batch_size, self.class_batch_size, image_fm_size.h,
            image_fm_size.w,
            4)  # batch_size x label_batch_size x fm_height x fm_width x 4
        output_localization = output_localization.transpose(3, 4).transpose(
            2, 3)  # batch_size x label_batch_size x 4 x fm_height x fm_width

        output_recognition = (matches_summed - 1.0) / 2.0
        output_recognition_transform_detached = (
            matches_summed_transform_detached - 1.0) / 2.0
        return output_localization, output_recognition, output_recognition_transform_detached, corner_coordinates
Esempio n. 6
0
    def get_boxes_from_image_dataframe(image_data, image_size):
        if not image_data.empty:
            # get the labels
            label_ids_global = torch.tensor(list(image_data["classid"]),
                                            dtype=torch.long)
            difficult_flag = torch.tensor(list(image_data["difficult"] == 1),
                                          dtype=torch.bool)

            # get the boxes
            boxes = image_data[["lx", "ty", "rx", "by"]].to_numpy()
            # renorm boxes using the image size
            boxes[:, 0] *= image_size.w
            boxes[:, 2] *= image_size.w
            boxes[:, 1] *= image_size.h
            boxes[:, 3] *= image_size.h
            boxes = torch.FloatTensor(boxes)

            boxes = BoxList(boxes, image_size=image_size, mode="xyxy")
        else:
            boxes = BoxList.create_empty(image_size)
            label_ids_global = torch.tensor([], dtype=torch.long)
            difficult_flag = torch.tensor([], dtype=torch.bool)

        boxes.add_field("labels", label_ids_global)
        boxes.add_field("difficult", difficult_flag)
        boxes.add_field("labels_original", label_ids_global)
        boxes.add_field("difficult_original", difficult_flag)
        return boxes
def save_cropped_boxes(dataset, tgt_image_path, extension=".jpg", num_random_crops_per_image=0):
    # crop all the boxes
    db = {"cids":[], "cluster":[], "gtbboxid":[], "classid":[], "imageid":[], "difficult":[], "type":[], "size":[], "bbox":[]}

    for image_id in tqdm(dataset.image_ids):
        img = dataset._get_dataset_image_by_id(image_id)
        boxes = dataset.get_image_annotation_for_imageid(image_id)

        assert boxes.has_field("labels"), "GT boxes need a field 'labels'"
        # remove all fields except "labels" and "difficult"
        for f in boxes.fields():
            if f not in ["labels", "difficult"]:
                boxes.remove_field(f)
        if not boxes.has_field("difficult"):
            boxes.add_field("difficult", torch.zeros(len(boxes), dtype=torch.bool))

        num_gt_boxes = len(boxes)
        im_size = FeatureMapSize(img=img)
        assert im_size == boxes.image_size

        eval_scale = dataset.get_eval_scale()

        # sample random boxes if needed
        if num_random_crops_per_image > 0:
            boxes_random = torch.rand(num_random_crops_per_image, 4)
            x1 = torch.min(boxes_random[:, 0], boxes_random[:, 2]) * im_size.w
            x2 = torch.max(boxes_random[:, 0], boxes_random[:, 2]) * im_size.w
            y1 = torch.min(boxes_random[:, 1], boxes_random[:, 3]) * im_size.h
            y2 = torch.max(boxes_random[:, 1], boxes_random[:, 3]) * im_size.h
            boxes_random = torch.stack([x1, y1, x2, y2], 1).floor()

            # crop boxes that are too small
            min_size = 10.0 / eval_scale * max(im_size.w, im_size.h)
            mask_bad_boxes = (boxes_random[:,0] + min_size > boxes_random[:,2]) | (boxes_random[:,1] + min_size > boxes_random[:,3])
            good_boxes = torch.nonzero(~mask_bad_boxes).view(-1)
            boxes_random = boxes_random[good_boxes]

            boxes_random = BoxList(boxes_random, im_size, mode="xyxy")

            boxes_random.add_field("labels", torch.full([len(boxes_random)], -1, dtype=torch.long))
            boxes_random.add_field("difficult", torch.zeros(len(boxes_random), dtype=torch.bool))
            boxes = cat_boxlist([boxes, boxes_random])

        if boxes is not None:
            for i_box in range(len(boxes)):
                # box format: left, top, right, bottom
                box = boxes[i_box].bbox_xyxy.view(-1)
                box = [b.item() for b in box]
                cropped_img = img.crop(box)

                if i_box < num_gt_boxes:
                    lbl = boxes[i_box].get_field("labels").item()
                    dif_flag = boxes[i_box].get_field("difficult").item()
                    box_id = i_box
                    box_type = "GT"
                else:
                    lbl = -1
                    dif_flag = 0
                    box_id = i_box
                    box_type = "RN"

                # create the file name to be used with cirtorch.datasets.datahelpers.cid2filename and their dataloader
                cid = "box{box_id:05d}_lbl{label:05d}_dif{dif:01d}_im{image_id:05d}{box_type}".format(box_id=box_id, image_id = image_id, label = lbl, dif = dif_flag, box_type=box_type)
                file_name = cid2filename(cid, prefix=tgt_image_path)

                # save the image
                image_path, _ = os.path.split(file_name)
                mkdir(image_path)
                if extension:
                    cropped_img.save("{}{}".format(file_name, extension))
                else:
                    # cirtorch uses files with empty extension for training for some reason, need to support that
                    cropped_img.save("{}".format(file_name), format="jpeg")

                # add to the db structure
                db["cids"].append(cid)
                db["cluster"].append(lbl)  # use labels as clusters not to sample negatives from the same object
                db["classid"].append(lbl)
                db["gtbboxid"].append(box_id)
                db["imageid"].append(image_id)
                db["difficult"].append(dif_flag)
                if i_box < num_gt_boxes:
                    db["type"].append("gtproposal")
                else:
                    db["type"].append("randomcrop")
                db["size"].append(cropped_img.size)
                db["bbox"].append(box)  # format (x1,y1,x2,y2)

    return db
Esempio n. 8
0
    def evaluate_detections(self,
                            all_boxes,
                            output_dir,
                            mAP_iou_threshold=0.5):
        predictions = []
        gt_boxes = []
        roidb = self.roidb
        for i_image, roi in enumerate(roidb):
            image_size = FeatureMapSize(w=roi["width"], h=roi["height"])
            if roi["boxes"].size > 0:
                roi_gt_boxes = BoxList(roi["boxes"], image_size, mode="xyxy")
            else:
                roi_gt_boxes = BoxList.create_empty(image_size)
            roi_gt_boxes.add_field(
                "labels", torch.as_tensor(roi["gt_classes"],
                                          dtype=torch.int32))
            roi_gt_boxes.add_field(
                "difficult",
                torch.as_tensor(roi["gt_ishard"], dtype=torch.int32))

            gt_boxes.append(roi_gt_boxes)

            roi_detections = []
            for i_class, class_boxes in enumerate(all_boxes):
                assert len(class_boxes) == len(roidb), \
                    "Number of detection for class {0} image{1} ({2}) inconsistent with the length of roidb ({3})".format(i_class, i_image, len(class_boxes), len(roidb))
                boxes = class_boxes[i_image]
                if len(boxes) > 0:
                    assert boxes.shape[
                        1] == 5, "Detections should be of shape (:,5), but are {0} for class {1}, image {2}".format(
                            boxes.shape, i_class, i_image)
                    bbox = BoxList(boxes[:, :4], image_size, mode="xyxy")
                    scores = boxes[:, -1]
                    bbox.add_field(
                        "scores", torch.as_tensor(scores, dtype=torch.float32))
                    bbox.add_field(
                        "labels",
                        torch.full(scores.shape, i_class, dtype=torch.int32))
                    roi_detections.append(bbox)

            if roi_detections:
                roi_detections = cat_boxlist(roi_detections)
            else:
                roi_detections = BoxList.create_empty(image_size)
                roi_detections.add_field(
                    "scores", torch.zeros((0, ), dtype=torch.float32))
                roi_detections.add_field("labels",
                                         torch.zeros((0, ), dtype=torch.int32))
            predictions.append(roi_detections)

            if False:
                self.visualize_detections(i_image,
                                          gt=roi_gt_boxes,
                                          dets=roi_detections)

        ap_data = do_voc_evaluation(predictions,
                                    gt_boxes,
                                    iou_thresh=mAP_iou_threshold,
                                    use_07_metric=False)
        print("mAP@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["map"]))
        print("mAPw@{:0.2f}: {:0.4f}".format(mAP_iou_threshold,
                                             ap_data["map_weighted"]))
        print("recall@{:0.2f}: {:0.4f}".format(mAP_iou_threshold,
                                               ap_data["recall"]))

        return ap_data['map']