def evaluate(dataloader, net, cfg, criterion=None, print_per_class_results=False): """ Evaluation of the provided model at one dataset Args: dataloader - the dataloader to get data net - the network to use cfg - config with all the parameters criterion - criterion (usually the same one as used for training), can be None, will just not compute related metrics print_per_class_results - flag showing whether to printout extra data (per class AP) - usually used at the final evaluation Returns: losses (OrderedDict) - all computed metrics, e.g., losses["[email protected]"] - mAP at IoU threshold 0.5 """ logger = logging.getLogger("OS2D.evaluate") dataset_name = dataloader.get_name() dataset_scale = dataloader.get_eval_scale() logger.info("Starting to eval on {0}, scale {1}".format(dataset_name, dataset_scale)) t_start_eval = time.time() net.eval() iterator = make_iterator_extract_scores_from_images_batched(dataloader, net, logger, image_batch_size=cfg.eval.batch_size, is_cuda=cfg.is_cuda, class_image_augmentation=cfg.eval.class_image_augmentation) boxes = [] gt_boxes = [] losses = OrderedDict() image_ids = [] # loop over all dataset images num_evaluted_images = 0 for data in iterator: image_id, image_loc_scores_pyramid, image_class_scores_pyramid,\ image_pyramid, query_img_sizes, class_ids,\ box_reverse_transform, image_fm_sizes_p, transform_corners_pyramid\ = data image_ids.append(image_id) num_evaluted_images += 1 img_size_pyramid = [FeatureMapSize(img=img) for img in image_pyramid] num_labels = len(class_ids) gt_boxes_one_image = dataloader.get_image_annotation_for_imageid(image_id) gt_boxes.append(gt_boxes_one_image) # compute losses if len(gt_boxes_one_image) > 0: # there is some annotation for this image gt_labels_one_image = gt_boxes_one_image.get_field("labels") dataloader.update_box_labels_to_local(gt_boxes_one_image, class_ids) loc_targets_pyramid, class_targets_pyramid = \ dataloader.box_coder.encode_pyramid(gt_boxes_one_image, img_size_pyramid, num_labels, default_box_transform_pyramid=box_reverse_transform) # return the original labels back gt_boxes_one_image.add_field("labels", gt_labels_one_image) # vizualize GT for debug if cfg.visualization.eval.show_gt_boxes: visualizer.show_gt_boxes(image_id, gt_boxes_one_image, class_ids, dataloader) if cfg.is_cuda: loc_targets_pyramid = [loc_targets.cuda() for loc_targets in loc_targets_pyramid] class_targets_pyramid = [class_targets.cuda() for class_targets in class_targets_pyramid] transform_corners_pyramid = [transform_corners.cuda() for transform_corners in transform_corners_pyramid] add_batch_dim = lambda list_of_tensors: [t.unsqueeze(0) for t in list_of_tensors] if criterion is not None: # if criterion is provided, use it to compute all metrics it can losses_iter = criterion(add_batch_dim(image_loc_scores_pyramid) if image_loc_scores_pyramid[0] is not None else None, add_batch_dim(loc_targets_pyramid), add_batch_dim(image_class_scores_pyramid), add_batch_dim(class_targets_pyramid) ) # convert to floats for l in losses_iter: losses_iter[l] = losses_iter[l].mean().item() # printing print_meters(losses_iter, logger) # update logs add_to_meters_in_dict(losses_iter, losses) # decode image predictions boxes_one_image = \ dataloader.box_coder.decode_pyramid(image_loc_scores_pyramid, image_class_scores_pyramid, img_size_pyramid, class_ids, nms_iou_threshold=cfg.eval.nms_iou_threshold, nms_score_threshold=cfg.eval.nms_score_threshold, inverse_box_transforms=box_reverse_transform, transform_corners_pyramid=transform_corners_pyramid) boxes.append(boxes_one_image.cpu()) if cfg.visualization.eval.show_detections: visualizer.show_detection_from_dataloader(boxes_one_image, image_id, dataloader, cfg.visualization.eval, class_ids=None) if cfg.visualization.eval.show_class_heatmaps: visualizer.show_class_heatmaps(image_id, class_ids, image_fm_sizes_p, class_targets_pyramid, image_class_scores_pyramid, cfg_local=cfg.visualization.eval, class_image_augmentation=cfg.eval.class_image_augmentation) if cfg.is_cuda: torch.cuda.empty_cache() # normalize by number of steps for k in losses: losses[k] /= num_evaluted_images # Save detection if requested path_to_save_detections = cfg.visualization.eval.path_to_save_detections if path_to_save_detections: data = {"image_ids" : image_ids, "boxes_xyxy" : [bb.bbox_xyxy for bb in boxes], "labels" : [bb.get_field("labels") for bb in boxes], "scores" : [bb.get_field("scores") for bb in boxes], "gt_boxes_xyxy" : [bb.bbox_xyxy for bb in gt_boxes], "gt_labels" : [bb.get_field("labels") for bb in gt_boxes], "gt_difficults" : [bb.get_field("difficult") for bb in gt_boxes] } dataset_name = dataloader.get_name() os.makedirs(path_to_save_detections, exist_ok=True) save_path = os.path.join(path_to_save_detections, dataset_name + "_detections.pth") torch.save(data, save_path) # compute mAP for mAP_iou_threshold in cfg.eval.mAP_iou_thresholds: logger.info("Evaluating at IoU th {:0.2f}".format(mAP_iou_threshold)) ap_data = do_voc_evaluation(boxes, gt_boxes, iou_thresh=mAP_iou_threshold, use_07_metric=False) losses["mAP@{:0.2f}".format(mAP_iou_threshold)] = ap_data["map"] losses["mAPw@{:0.2f}".format(mAP_iou_threshold)] = ap_data["map_weighted"] losses["recall@{:0.2f}".format(mAP_iou_threshold)] = ap_data["recall"] if print_per_class_results: # per class AP information for i_class, (ap, recall, n_pos) in enumerate(zip(ap_data["ap_per_class"], ap_data["recall_per_class"], ap_data["n_pos"])): if not np.isnan(ap): assert i_class in class_ids, "Could not find class_id in the list of ids" logger.info("Class {0} (local {3}), AP {1:0.4f}, #obj {2}, recall {4:0.4f}".format(i_class, ap, n_pos, class_ids.index(i_class), recall)) # save timing losses["eval_time"] = (time.time() - t_start_eval) logger.info("Evaluated on {0}, scale {1}".format(dataset_name, dataset_scale)) print_meters(losses, logger) return losses
def evaluate(dataloader, detector, cfg_maskrcnn, retrievalnet, opt, cfg_eval, cfg_visualization, is_cuda=False, logger_prefix="detector-retrieval"): logger = logging.getLogger(f"{logger_prefix}.evaluate") dataset_name = dataloader.get_name() dataset_scale = dataloader.get_eval_scale() logger.info("Starting to eval on {0}, scale {1}".format( dataset_name, dataset_scale)) t_start_eval = time.time() detector.eval() retrievalnet.eval() ## setup retrievalnet # setting up the multi-scale parameters ms = [1] msp = 1 if opt.retrieval_multiscale: ms = [1, 1. / math.sqrt(2), 1. / 2] if retrievalnet.meta[ "pooling"] == "gem" and retrievalnet.whiten is None: msp = retrievalnet.pool.p.data.tolist()[0] #setup whitening if opt.retrieval_whitening_path is not None: logger.info("Whitening is precomputed, loading it from {0}".format( opt.retrieval_whitening_path)) whitening_data = torch.load(opt.retrieval_whitening_path) if ( (opt.retrieval_multiscale and "ms" in whitening_data) or \ (not opt.retrieval_multiscale and "ss" in whitening_data ) ): if opt.retrieval_multiscale: Lw = copy.deepcopy(whitening_data["ms"]) else: Lw = copy.deepcopy(whitening_data["ss"]) else: raise RuntimeError( "Whitening should be precomputed with the network") # convert whitening data to torch tensors Lw["m"], Lw["P"] = torch.from_numpy(Lw["m"]), torch.from_numpy(Lw["P"]) if is_cuda: Lw["m"], Lw["P"] = Lw["m"].cuda(), Lw["P"].cuda() else: Lw = None with torch.no_grad( ): # do evaluation in forward mode only (for speed and memory) # extract features from query images query_images, _, _ = dataloader.get_all_class_images(do_resize=False) if is_cuda: query_images = [img.cuda() for img in query_images] query_images = [img[0] for img in query_images ] # get rid of the batch dimension query_images = [ resize_image_tensor(img, opt.retrieval_image_size) for img in query_images ] query_images = [dataloader.unnorm_image(img) for img in query_images] query_images_with_aug = [] for im in query_images: query_images_with_aug.append(im) if not cfg_eval.class_image_augmentation: num_class_views = 1 elif cfg_eval.class_image_augmentation == "rotation90": im90 = im.rot90(1, [1, 2]) im180 = im90.rot90(1, [1, 2]) im270 = im180.rot90(1, [1, 2]) query_images_with_aug.append(im90) query_images_with_aug.append(im180) query_images_with_aug.append(im270) num_class_views = 4 elif cfg_eval.class_image_augmentation == "horflip": im_flipped = im.flip(2) query_images_with_aug.append(im_flipped) num_class_views = 2 else: raise RuntimeError( f"Unknown value of class_image_augmentation: {cfg_eval.class_image_augmentation}" ) query_images = query_images_with_aug query_vectors = extract_vectors_from_images(retrievalnet, query_images, ms=ms, msp=msp) # apply whitening if defined if Lw is not None: query_vectors = whitenapply(query_vectors, Lw["m"], Lw["P"]) query_vectors = torch.transpose(query_vectors, 0, 1) # prepare looping over all iamges iterator = make_iterator_extract_scores_from_images_batched( dataloader, detector, cfg_maskrcnn, logger, image_batch_size=cfg_eval.batch_size, is_cuda=is_cuda) boxes, labels, scores = [], [], [] gt_boxes = [] image_ids = [] losses = OrderedDict() # loop over all dataset images num_evaluted_images = 0 for data in iterator: image_id, boxes_one_image, image_pyramid, query_img_sizes, class_ids, initial_img_size = data image_ids.append(image_id) logger.info(f"Image {num_evaluted_images}: id {image_id}") num_evaluted_images += 1 img_size_pyramid = [ FeatureMapSize(img=img) for img in image_pyramid ] gt_boxes_one_image = dataloader.get_image_annotation_for_imageid( image_id) gt_boxes.append(gt_boxes_one_image) # vizualize GT for debug if cfg_visualization.show_gt_boxes: visualizer.show_gt_boxes(image_id, gt_boxes_one_image, class_ids, dataloader) # decode image predictions # merge boxes_one_image, labels_one_image, scores_one_image from different pyramid layers boxes_one_image = cat_boxlist(boxes_one_image) # do NMS good_indices = nms( boxes_one_image, opt.nms_iou_threshold_detector_score, nms_score_threshold=opt.nms_score_threshold_detector_score) boxes_one_image = boxes_one_image[good_indices] # extract feature vectors from the predictions image_original = dataloader._transform_image(image_id, do_augmentation=True, hflip=False, vflip=False)[0] if is_cuda: image_original = image_original.cuda() image_patches = crop_resize_image_patches( image_original, boxes_one_image, opt.retrieval_image_size, logger, unnorm_image=dataloader.unnorm_image, is_cuda=is_cuda) # filter out cases when failed to crop a box: outside of the image good_indices = [ i for i, p in enumerate(image_patches) if p is not None ] if good_indices: # non empty image_patches = [p for p in image_patches if p is not None] boxes_one_image = boxes_one_image[good_indices] image_vectors = extract_vectors_from_images(retrievalnet, image_patches, ms=ms, msp=msp) # compute class scores from image_vectors and query_vectors (already transposed) if Lw is not None: # apply whitening if defined image_vectors = whitenapply(image_vectors, Lw["m"], Lw["P"]) scores_retrieval = torch.mm(query_vectors, image_vectors) num_queries = scores_retrieval.size(0) num_detections = scores_retrieval.size(1) list_of_active_label = torch.LongTensor(class_ids) if cfg_eval.class_image_augmentation: list_of_active_label = torch.stack( [list_of_active_label] * num_class_views, 1).view(-1) # take all labels for all boxes - will sort them by scores at eval scores_one_image = scores_retrieval.view(-1) boxes_one_image = cat_boxlist([boxes_one_image] * num_queries) labels_one_image = torch.stack([list_of_active_label] * num_detections, 1).contiguous().view(-1) # add scores and labels: overwrite if existed boxes_one_image.add_field("labels", labels_one_image) boxes_one_image.add_field("scores", scores_one_image) # NMS using the retrieval scores good_indices = nms( boxes_one_image, cfg_eval.nms_iou_threshold, nms_score_threshold=cfg_eval.nms_score_threshold, do_separate_per_label=not cfg_eval.nms_across_classes) boxes_one_image = boxes_one_image[good_indices] else: boxes_one_image.add_field( "labels", torch.zeros(0, dtype=torch.long, device=boxes_one_image.bbox_xyxy.device)) boxes_one_image.add_field( "scores", torch.zeros(0, dtype=torch.float, device=boxes_one_image.bbox_xyxy.device)) boxes.append(boxes_one_image.cpu()) if cfg_visualization.show_detections: # do not pass class_ids - this is already taken care of visualizer.show_detections(boxes_one_image, image_id, dataloader, cfg_visualization, class_ids=None) # normalize by number of steps for k in losses: losses[k] /= num_evaluted_images # Save detection if requested if cfg_visualization.path_to_save_detections: data = { "image_ids": image_ids, "boxes_xyxy": [bb.bbox_xyxy for bb in boxes], "labels": [bb.get_field("labels") for bb in boxes], "scores": [bb.get_field("scores") for bb in boxes], "gt_boxes_xyxy": [bb.bbox_xyxy for bb in gt_boxes], "gt_labels": [bb.get_field("labels") for bb in gt_boxes], "gt_difficults": [bb.get_field("difficult") for bb in gt_boxes] } dataset_name = dataloader.get_name() os.makedirs(cfg_visualization.path_to_save_detections, exist_ok=True) save_path = os.path.join(cfg_visualization.path_to_save_detections, dataset_name + "_detections.pth") torch.save(data, save_path) # compute mAP for mAP_iou_threshold in cfg_eval.mAP_iou_thresholds: logger.info("Evaluating at IoU th {:0.2f}".format(mAP_iou_threshold)) ap_data = do_voc_evaluation(boxes, gt_boxes, iou_thresh=mAP_iou_threshold, use_07_metric=False) losses["mAP@{:0.2f}".format(mAP_iou_threshold)] = ap_data["map"] losses["mAPw@{:0.2f}".format( mAP_iou_threshold)] = ap_data["map_weighted"] losses["recall@{:0.2f}".format(mAP_iou_threshold)] = ap_data["recall"] losses["AP_joint_classes@{:0.2f}".format( mAP_iou_threshold)] = ap_data["ap_joint_classes"] # per class AP information for i_class, (ap, recall, n_pos) in enumerate( zip(ap_data["ap_per_class"], ap_data["recall_per_class"], ap_data["n_pos"])): if not np.isnan(ap): assert i_class in class_ids, "Could not find class_id in the list of ids" logger.info( "Class {0} (local {3}), AP {1:0.4f}, #obj {2}, recall {4:0.4f}" .format(i_class, ap, n_pos, class_ids.index(i_class), recall)) # save timing losses["eval_time"] = (time.time() - t_start_eval) logger.info("Evaluated on {0}, scale {1}".format(dataset_name, dataset_scale)) print_meters(losses, logger) return losses
def evaluate_detections(self, all_boxes, output_dir, mAP_iou_threshold=0.5): predictions = [] gt_boxes = [] roidb = self.roidb for i_image, roi in enumerate(roidb): image_size = FeatureMapSize(w=roi["width"], h=roi["height"]) if roi["boxes"].size > 0: roi_gt_boxes = BoxList(roi["boxes"], image_size, mode="xyxy") else: roi_gt_boxes = BoxList.create_empty(image_size) roi_gt_boxes.add_field( "labels", torch.as_tensor(roi["gt_classes"], dtype=torch.int32)) roi_gt_boxes.add_field( "difficult", torch.as_tensor(roi["gt_ishard"], dtype=torch.int32)) gt_boxes.append(roi_gt_boxes) roi_detections = [] for i_class, class_boxes in enumerate(all_boxes): assert len(class_boxes) == len(roidb), \ "Number of detection for class {0} image{1} ({2}) inconsistent with the length of roidb ({3})".format(i_class, i_image, len(class_boxes), len(roidb)) boxes = class_boxes[i_image] if len(boxes) > 0: assert boxes.shape[ 1] == 5, "Detections should be of shape (:,5), but are {0} for class {1}, image {2}".format( boxes.shape, i_class, i_image) bbox = BoxList(boxes[:, :4], image_size, mode="xyxy") scores = boxes[:, -1] bbox.add_field( "scores", torch.as_tensor(scores, dtype=torch.float32)) bbox.add_field( "labels", torch.full(scores.shape, i_class, dtype=torch.int32)) roi_detections.append(bbox) if roi_detections: roi_detections = cat_boxlist(roi_detections) else: roi_detections = BoxList.create_empty(image_size) roi_detections.add_field( "scores", torch.zeros((0, ), dtype=torch.float32)) roi_detections.add_field("labels", torch.zeros((0, ), dtype=torch.int32)) predictions.append(roi_detections) if False: self.visualize_detections(i_image, gt=roi_gt_boxes, dets=roi_detections) ap_data = do_voc_evaluation(predictions, gt_boxes, iou_thresh=mAP_iou_threshold, use_07_metric=False) print("mAP@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["map"])) print("mAPw@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["map_weighted"])) print("recall@{:0.2f}: {:0.4f}".format(mAP_iou_threshold, ap_data["recall"])) return ap_data['map']