def show_annotated_image(img, boxes, labels, scores, class_ids, score_threshold=0.0, default_boxes=None, transform_corners=None, max_dets=None, showfig=False, image_id=None): good_ids = torch.nonzero(scores.float() > score_threshold).view(-1) if good_ids.numel() > 0: if max_dets is not None: _, ids = scores[good_ids].sort(descending=False) good_ids = good_ids[ids[-max_dets:]] boxes = boxes[good_ids].cpu() labels = labels[good_ids].cpu() scores = scores[good_ids].cpu() label_names = ["Cl " + str(l.item()) for l in labels] box_colors = ["yellow"] * len(boxes) else: boxes = BoxList.create_empty(boxes.image_size) labels = torch.LongTensor(0) scores = torch.FloatTensor(0) label_names = [] box_colors = [] # create visualizations of default boxes if default_boxes is not None: default_boxes = default_boxes[good_ids].cpu() # append boxes boxes = torch.cat([default_boxes.bbox_xyxy, boxes.bbox_xyxy], 0) labels = torch.cat( [torch.Tensor(len(default_boxes)).to(labels).zero_(), labels], 0) scores = torch.cat([ torch.Tensor(len(default_boxes)).to(scores).fill_(float("nan")), scores ], 0) label_names = [""] * len(default_boxes) + label_names box_colors = ["cyan"] * len(default_boxes) + box_colors else: boxes = boxes.bbox_xyxy if transform_corners is not None: # draw polygons representing the corners of a transformation transform_corners = transform_corners[good_ids].cpu() vis_image(img, showfig=showfig, boxes=boxes, scores=scores, label_names=label_names, colors=box_colors, image_id=image_id, polygons=transform_corners) return
def _transform_image_to_pyramid(self, image_id, boxes=None, do_augmentation=True, hflip=False, vflip=False, pyramid_scales=(1, ), mined_data=None): img = self._get_dataset_image_by_id(image_id) img_size = FeatureMapSize(img=img) do_augmentation = do_augmentation and self.data_augmentation is not None num_pyramid_levels = len(pyramid_scales) use_mined_crop = mined_data is not None if use_mined_crop: crop_position = mined_data["crop_position_xyxy"] if boxes is None: boxes = BoxList.create_empty(img_size) mask_cutoff_boxes = torch.zeros(len(boxes), dtype=torch.bool) mask_difficult_boxes = torch.zeros(len(boxes), dtype=torch.bool) box_inverse_transform = TransformList() # batch level data augmentation img, boxes = transforms_boxes.transpose( img, hflip=hflip, vflip=vflip, boxes=boxes, transform_list=box_inverse_transform) if use_mined_crop: # update crop_position_xyxy with the symmetries if hflip or vflip: _, crop_position = transforms_boxes.transpose( img, hflip=hflip, vflip=vflip, boxes=crop_position) if do_augmentation: if self.data_augmentation.do_random_crop: if not use_mined_crop: img, boxes, mask_cutoff_boxes, mask_difficult_boxes = \ self.data_augmentation.random_crop(img, boxes=boxes, transform_list=box_inverse_transform) else: img, boxes, mask_cutoff_boxes, mask_difficult_boxes = \ self.data_augmentation.crop_image(img, crop_position, boxes=boxes, transform_list=box_inverse_transform) img, boxes = transforms_boxes.resize( img, target_size=self.data_augmentation.random_crop_size, random_interpolation=self.data_augmentation. random_interpolation, boxes=boxes, transform_list=box_inverse_transform) # color distortion img = self.data_augmentation.random_distort(img) random_interpolation = self.data_augmentation.random_interpolation if do_augmentation else False img_size = FeatureMapSize(img=img) pyramid_sizes = [ FeatureMapSize(w=int(img_size.w * s), h=int(img_size.h * s)) for s in pyramid_scales ] img_pyramid = [] boxes_pyramid = [] pyramid_box_inverse_transform = [] for p_size in pyramid_sizes: box_inverse_transform_this_scale = copy.deepcopy( box_inverse_transform) p_img, p_boxes = transforms_boxes.resize( img, target_size=p_size, random_interpolation=random_interpolation, boxes=boxes, transform_list=box_inverse_transform_this_scale) pyramid_box_inverse_transform.append( box_inverse_transform_this_scale) img_pyramid.append(p_img) boxes_pyramid.append(p_boxes) transforms_th = [transforms.ToTensor()] if self.img_normalization is not None: transforms_th += [ transforms.Normalize(self.img_normalization["mean"], self.img_normalization["std"]) ] for i_p in range(num_pyramid_levels): img_pyramid[i_p] = transforms.Compose(transforms_th)( img_pyramid[i_p]) return img_pyramid, boxes_pyramid, mask_cutoff_boxes, mask_difficult_boxes, pyramid_box_inverse_transform
def make_iterator_extract_scores_from_images_batched(dataloader, maskrcnn_model, maskrcnn_config, logger, image_batch_size=None, is_cuda=False): logger.info("Starting iterations over images") # get images of all classes class_images, class_aspect_ratios, class_ids = dataloader.get_all_class_images( ) num_classes = len(class_images) assert len(class_aspect_ratios) == num_classes assert len(class_ids) == num_classes query_img_sizes = [FeatureMapSize(img=img) for img in class_images] # loop over all images iterator_batches = dataloader.make_iterator_for_all_images( image_batch_size) for batch_ids, pyramids_batch, box_transforms_batch, initial_img_size_batch in iterator_batches: t_start_batch = time.time() # extract features at all pyramid levels batch_images_pyramid = [] bboxes_xyxy = [] labels = [] scores = [] num_pyramid_levels = len(pyramids_batch) for batch_images in pyramids_batch: if is_cuda: batch_images = batch_images.cuda() # print("Image size:", images_b.size()) batch_images = [ dataloader.unnorm_image(img) for img in batch_images ] batch_images = torch.stack(batch_images, 0) bboxes_xyxy_, labels_, scores_ = run_maskrcnn_on_images( maskrcnn_model, maskrcnn_config, batch_images) bboxes_xyxy.append(bboxes_xyxy_) labels.append(labels_) scores.append(scores_) batch_images_pyramid.append(batch_images) for i_image_in_batch, image_id in enumerate(batch_ids): # get data from all pyramid levels bboxes_xyxy_p = [] labels_p = [] scores_p = [] for i_p in range(num_pyramid_levels): bboxes_xyxy_p.append(bboxes_xyxy[i_p][i_image_in_batch]) labels_p.append(labels[i_p][i_image_in_batch]) scores_p.append(scores[i_p][i_image_in_batch]) # get a pyramid of one image[i_p] one_image_pyramid = [ p[i_image_in_batch] for p in batch_images_pyramid ] # extract the box transformations box_reverse_transforms = box_transforms_batch[i_image_in_batch] # get the boxes in the correct format bboxes_xyxy_p = [ BoxList(bbox, FeatureMapSize(img=img), mode="xyxy") for bbox, img in zip(bboxes_xyxy_p, one_image_pyramid) ] bboxes_xyxy_p = [ t(bb) for t, bb in zip(box_reverse_transforms, bboxes_xyxy_p) ] # add labels and scores into the box structure for bb, l, s in zip(bboxes_xyxy_p, labels_p, scores_p): bb.add_field("labels", l) bb.add_field("scores", s) # get the size of the initial image initial_img_size = initial_img_size_batch[i_image_in_batch] yield image_id, bboxes_xyxy_p, one_image_pyramid, query_img_sizes, class_ids, initial_img_size
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5, merge_classes_together=False): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): pred_bbox = pred_boxlist.bbox_xyxy.cpu().numpy() pred_label = pred_boxlist.get_field("labels").numpy() pred_score = pred_boxlist.get_field("scores").numpy() gt_bbox = gt_boxlist.bbox_xyxy.cpu().numpy() gt_label = gt_boxlist.get_field("labels").numpy() if gt_boxlist.has_field("difficult"): gt_difficult = gt_boxlist.get_field("difficult").numpy() else: gt_difficult = np.zeros_like(gt_label) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0,) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.image_size), BoxList(gt_bbox_l, gt_boxlist.image_size), ).numpy() gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) if merge_classes_together: n_pos = {0: sum(n_pos[i] for i in n_pos)} # merge lists together, copy to avoid rewriting the old lists old_score = copy.deepcopy(score) score = {0: sum((old_score[i] for i in old_score), [])} old_match = copy.deepcopy(match) match = {0: sum((old_match[i] for i in old_match), [])} n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec, n_pos
def forward(self, feature_maps): """ Args: feature_maps (Tensor[float], size b^A x d x h^A x w^A) - contains the feature map of the input image b^A - batch size d - feature dimensionality h^A - height of the feature map w^A - width of the feature map Returns: # here b^C is the class batch size, i.e., the number of class images contained in self.class_batch_size passed when creating this object output_localization (Tensor[float], size b^A x b^C x 4 x h^A x w^A) - the localization output w.r.t. the standard box encoding - computed by DetectionBoxCoder.build_loc_targets output_recognition (Tensor[float], size size b^A x b^C x 1 x h^A x w^A) - the recognition output for each of the classes - the correlation, linearly converted to [0, 1] segment, the higher the better match to the class output_recognition_transform_detached (Tensor[float], size b^A x b^C x 1 x h^A x w^A) - same to output_recognition, but with the computational graph detached from the transformation (for backward that does not update the transofrmation - intended for the negatives) corner_coordinates (Tensor[float], size size b^A x b^C x 8 x h^A x w^A) - the corners of the default boxes after the transofrmation, datached from the computational graph, for visualisation only """ # get dims batch_size = feature_maps.size(0) feature_dim = feature_maps.size(1) image_fm_size = FeatureMapSize(img=feature_maps) class_fm_size = FeatureMapSize(img=self.class_feature_maps) feature_dim_for_regression = class_fm_size.h * class_fm_size.w class_feature_dim = self.class_feature_maps.size(1) assert feature_dim == class_feature_dim, "Feature dimensionality of input={0} and class={1} feature maps has to equal".format( feature_dim, class_feature_dim) # L2-normalize the feature map feature_maps = normalize_feature_map_L2(feature_maps, 1e-5) # get correlations all to all corr_maps = torch.einsum("bfhw,afxy->abwhxy", self.class_feature_maps, feature_maps) # need to try to optimize this with opt_einsum: https://optimized-einsum.readthedocs.io/en/latest/ # CAUTION: note the switch of dimensions hw to wh. This is done for compatability with the FeatureCorrelation class by Ignacio Rocco https://github.com/ignacio-rocco/ncnet/blob/master/lib/model.py (to be able to load their models) # reshape to have the correlation map of dimensions similar to the standard tensor for image feature maps corr_maps = corr_maps.contiguous().view( batch_size * self.class_batch_size, feature_dim_for_regression, image_fm_size.h, image_fm_size.w) # compute the grids to resample corr maps resampling_grids_local_coord = self.aligner(corr_maps) # build classifications outputs cor_maps_for_recognition = corr_maps.contiguous().view( batch_size, self.class_batch_size, feature_dim_for_regression, image_fm_size.h, image_fm_size.w) resampling_grids_local_coord = resampling_grids_local_coord.contiguous( ).view(batch_size, self.class_batch_size, image_fm_size.h, image_fm_size.w, self.aligner.out_grid_size.h, self.aligner.out_grid_size.w, 2) # need to recompute resampling_grids to [-1, 1] coordinates w.r.t. the feature maps to sample points with F.grid_sample # first get the list of boxes that corresponds to the receptive fields of the parameter regression network: box sizes are the receptive field sizes, stride is the network stride default_boxes_xyxy_wrt_fm = self.box_grid_generator_feature_map_level.create_strided_boxes_columnfirst( fm_size=image_fm_size) default_boxes_xyxy_wrt_fm = default_boxes_xyxy_wrt_fm.view( 1, 1, image_fm_size.h, image_fm_size.w, 4) # 1 (to broadcast to batch_size) x 1 (to broadcast to class batch_size) x box_grid_height x box_grid_width x 4 default_boxes_xyxy_wrt_fm = default_boxes_xyxy_wrt_fm.to( resampling_grids_local_coord.device) resampling_grids_fm_coord = convert_box_coordinates_local_to_global( resampling_grids_local_coord, default_boxes_xyxy_wrt_fm) # covert to coordinates normalized to [-1, 1] (to be compatible with torch.nn.functional.grid_sample) resampling_grids_fm_coord_x = resampling_grids_fm_coord.narrow( -1, 0, 1) resampling_grids_fm_coord_y = resampling_grids_fm_coord.narrow( -1, 1, 1) resampling_grids_fm_coord_unit = torch.cat([ resampling_grids_fm_coord_x / (image_fm_size.w - 1) * 2 - 1, resampling_grids_fm_coord_y / (image_fm_size.h - 1) * 2 - 1 ], dim=-1) # clamp to fit the image plane resampling_grids_fm_coord_unit = resampling_grids_fm_coord_unit.clamp( -1, 1) # extract and pool matches # # slower code: # matches_summed = self.resample_of_correlation_map_simple(cor_maps_for_recognition, # resampling_grids_fm_coord_unit, # self.class_pool_mask) # we use faster, but somewhat more obscure version matches_summed = self.resample_of_correlation_map_fast( cor_maps_for_recognition, resampling_grids_fm_coord_unit, self.class_pool_mask) if matches_summed.requires_grad: matches_summed_transform_detached = self.resample_of_correlation_map_fast( cor_maps_for_recognition, resampling_grids_fm_coord_unit.detach(), self.class_pool_mask) else: # Optimization to make eval faster matches_summed_transform_detached = matches_summed # build localization targets default_boxes_xyxy_wrt_image = self.box_grid_generator_image_level.create_strided_boxes_columnfirst( fm_size=image_fm_size) default_boxes_xyxy_wrt_image = default_boxes_xyxy_wrt_image.view( 1, 1, image_fm_size.h, image_fm_size.w, 4) # 1 (to broadcast to batch_size) x 1 (to broadcast to class batch_size) x box_grid_height x box_grid_width x 4 default_boxes_xyxy_wrt_image = default_boxes_xyxy_wrt_image.to( resampling_grids_local_coord.device) resampling_grids_image_coord = convert_box_coordinates_local_to_global( resampling_grids_local_coord, default_boxes_xyxy_wrt_image) num_pooled_points = self.aligner.out_grid_size.w * self.aligner.out_grid_size.h resampling_grids_x = resampling_grids_image_coord.narrow( -1, 0, 1).contiguous().view(-1, num_pooled_points) resampling_grids_y = resampling_grids_image_coord.narrow( -1, 1, 1).contiguous().view(-1, num_pooled_points) class_boxes_xyxy = torch.stack([ resampling_grids_x.min(dim=1)[0], resampling_grids_y.min(dim=1)[0], resampling_grids_x.max(dim=1)[0], resampling_grids_y.max(dim=1)[0] ], 1) # extract rectangle borders to draw complete boxes corner_coordinates = resampling_grids_image_coord[:, :, :, :, [ 0, -1 ]][:, :, :, :, :, [0, -1]] # only the corners corner_coordinates = corner_coordinates.detach_() corner_coordinates = corner_coordinates.view( batch_size, self.class_batch_size, image_fm_size.h, image_fm_size.w, 8) # batch_size x label_batch_size x fm_height x fm_width x 8 corner_coordinates = corner_coordinates.transpose(3, 4).transpose( 2, 3) # batch_size x label_batch_size x 5 x fm_height x fm_width class_boxes = BoxList(class_boxes_xyxy.view(-1, 4), image_fm_size, mode="xyxy") default_boxes_wrt_image = BoxList(default_boxes_xyxy_wrt_image.view( -1, 4), image_fm_size, mode="xyxy") default_boxes_with_image_batches = cat_boxlist( [default_boxes_wrt_image] * batch_size * self.class_batch_size) output_localization = Os2dBoxCoder.build_loc_targets( class_boxes, default_boxes_with_image_batches) # num_boxes x 4 output_localization = output_localization.view( batch_size, self.class_batch_size, image_fm_size.h, image_fm_size.w, 4) # batch_size x label_batch_size x fm_height x fm_width x 4 output_localization = output_localization.transpose(3, 4).transpose( 2, 3) # batch_size x label_batch_size x 4 x fm_height x fm_width output_recognition = (matches_summed - 1.0) / 2.0 output_recognition_transform_detached = ( matches_summed_transform_detached - 1.0) / 2.0 return output_localization, output_recognition, output_recognition_transform_detached, corner_coordinates
def get_boxes_from_image_dataframe(image_data, image_size): if not image_data.empty: # get the labels label_ids_global = torch.tensor(list(image_data["classid"]), dtype=torch.long) difficult_flag = torch.tensor(list(image_data["difficult"] == 1), dtype=torch.bool) # get the boxes boxes = image_data[["lx", "ty", "rx", "by"]].to_numpy() # renorm boxes using the image size boxes[:, 0] *= image_size.w boxes[:, 2] *= image_size.w boxes[:, 1] *= image_size.h boxes[:, 3] *= image_size.h boxes = torch.FloatTensor(boxes) boxes = BoxList(boxes, image_size=image_size, mode="xyxy") else: boxes = BoxList.create_empty(image_size) label_ids_global = torch.tensor([], dtype=torch.long) difficult_flag = torch.tensor([], dtype=torch.bool) boxes.add_field("labels", label_ids_global) boxes.add_field("difficult", difficult_flag) boxes.add_field("labels_original", label_ids_global) boxes.add_field("difficult_original", difficult_flag) return boxes
def save_cropped_boxes(dataset, tgt_image_path, extension=".jpg", num_random_crops_per_image=0): # crop all the boxes db = {"cids":[], "cluster":[], "gtbboxid":[], "classid":[], "imageid":[], "difficult":[], "type":[], "size":[], "bbox":[]} for image_id in tqdm(dataset.image_ids): img = dataset._get_dataset_image_by_id(image_id) boxes = dataset.get_image_annotation_for_imageid(image_id) assert boxes.has_field("labels"), "GT boxes need a field 'labels'" # remove all fields except "labels" and "difficult" for f in boxes.fields(): if f not in ["labels", "difficult"]: boxes.remove_field(f) if not boxes.has_field("difficult"): boxes.add_field("difficult", torch.zeros(len(boxes), dtype=torch.bool)) num_gt_boxes = len(boxes) im_size = FeatureMapSize(img=img) assert im_size == boxes.image_size eval_scale = dataset.get_eval_scale() # sample random boxes if needed if num_random_crops_per_image > 0: boxes_random = torch.rand(num_random_crops_per_image, 4) x1 = torch.min(boxes_random[:, 0], boxes_random[:, 2]) * im_size.w x2 = torch.max(boxes_random[:, 0], boxes_random[:, 2]) * im_size.w y1 = torch.min(boxes_random[:, 1], boxes_random[:, 3]) * im_size.h y2 = torch.max(boxes_random[:, 1], boxes_random[:, 3]) * im_size.h boxes_random = torch.stack([x1, y1, x2, y2], 1).floor() # crop boxes that are too small min_size = 10.0 / eval_scale * max(im_size.w, im_size.h) mask_bad_boxes = (boxes_random[:,0] + min_size > boxes_random[:,2]) | (boxes_random[:,1] + min_size > boxes_random[:,3]) good_boxes = torch.nonzero(~mask_bad_boxes).view(-1) boxes_random = boxes_random[good_boxes] boxes_random = BoxList(boxes_random, im_size, mode="xyxy") boxes_random.add_field("labels", torch.full([len(boxes_random)], -1, dtype=torch.long)) boxes_random.add_field("difficult", torch.zeros(len(boxes_random), dtype=torch.bool)) boxes = cat_boxlist([boxes, boxes_random]) if boxes is not None: for i_box in range(len(boxes)): # box format: left, top, right, bottom box = boxes[i_box].bbox_xyxy.view(-1) box = [b.item() for b in box] cropped_img = img.crop(box) if i_box < num_gt_boxes: lbl = boxes[i_box].get_field("labels").item() dif_flag = boxes[i_box].get_field("difficult").item() box_id = i_box box_type = "GT" else: lbl = -1 dif_flag = 0 box_id = i_box box_type = "RN" # create the file name to be used with cirtorch.datasets.datahelpers.cid2filename and their dataloader cid = "box{box_id:05d}_lbl{label:05d}_dif{dif:01d}_im{image_id:05d}{box_type}".format(box_id=box_id, image_id = image_id, label = lbl, dif = dif_flag, box_type=box_type) file_name = cid2filename(cid, prefix=tgt_image_path) # save the image image_path, _ = os.path.split(file_name) mkdir(image_path) if extension: cropped_img.save("{}{}".format(file_name, extension)) else: # cirtorch uses files with empty extension for training for some reason, need to support that cropped_img.save("{}".format(file_name), format="jpeg") # add to the db structure db["cids"].append(cid) db["cluster"].append(lbl) # use labels as clusters not to sample negatives from the same object db["classid"].append(lbl) db["gtbboxid"].append(box_id) db["imageid"].append(image_id) db["difficult"].append(dif_flag) if i_box < num_gt_boxes: db["type"].append("gtproposal") else: db["type"].append("randomcrop") db["size"].append(cropped_img.size) db["bbox"].append(box) # format (x1,y1,x2,y2) return db
def evaluate_detections(self, all_boxes, output_dir, mAP_iou_threshold=0.5): predictions = [] gt_boxes = [] roidb = self.roidb for i_image, roi in enumerate(roidb): image_size = FeatureMapSize(w=roi["width"], h=roi["height"]) if roi["boxes"].size > 0: roi_gt_boxes = BoxList(roi["boxes"], image_size, mode="xyxy") else: roi_gt_boxes = BoxList.create_empty(image_size) roi_gt_boxes.add_field( "labels", torch.as_tensor(roi["gt_classes"], dtype=torch.int32)) roi_gt_boxes.add_field( "difficult", torch.as_tensor(roi["gt_ishard"], dtype=torch.int32)) gt_boxes.append(roi_gt_boxes) roi_detections = [] for i_class, class_boxes in enumerate(all_boxes): assert len(class_boxes) == len(roidb), \ "Number of detection for class {0} image{1} ({2}) inconsistent with the length of roidb ({3})".format(i_class, i_image, len(class_boxes), len(roidb)) boxes = class_boxes[i_image] if len(boxes) > 0: assert boxes.shape[ 1] == 5, "Detections should be of shape (:,5), but are {0} for class {1}, image {2}".format( boxes.shape, i_class, i_image) bbox = BoxList(boxes[:, :4], image_size, mode="xyxy") scores = boxes[:, -1] bbox.add_field( "scores", torch.as_tensor(scores, dtype=torch.float32)) bbox.add_field( "labels", torch.full(scores.shape, i_class, dtype=torch.int32)) roi_detections.append(bbox) if roi_detections: roi_detections = cat_boxlist(roi_detections) else: roi_detections = BoxList.create_empty(image_size) roi_detections.add_field( "scores", torch.zeros((0, ), dtype=torch.float32)) roi_detections.add_field("labels", torch.zeros((0, ), dtype=torch.int32)) predictions.append(roi_detections) if False: self.visualize_detections(i_image, gt=roi_gt_boxes, dets=roi_detections) ap_data = do_voc_evaluation(predictions, gt_boxes, iou_thresh=mAP_iou_threshold, use_07_metric=False) print("mAP@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["map"])) print("mAPw@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["map_weighted"])) print("recall@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["recall"])) return ap_data['map']