def _evaluate_predictions_on_coco(coco_gt, coco_results): metrics = ["AP", "AP50", "AP75", "APm", "APl"] logger = logging.getLogger(__name__) if len(coco_results ) == 0: # cocoapi does not handle empty results very well logger.warn("No predictions from the model! Set scores to -1") results_gps = {metric: -1 for metric in metrics} results_gpsm = {metric: -1 for metric in metrics} return results_gps, results_gpsm coco_dt = coco_gt.loadRes(coco_results) results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics) logger.info("Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)) results_iou = _evaluate_predictions_on_coco_iou(coco_gt, coco_dt, metrics) logger.info("Evaluation results for densepose, IOU metric: \n" + create_small_table(results_iou)) results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics) logger.info("Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)) return results_gps, results_gpsm, results_iou
def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5): logger = logging.getLogger(__name__) segm_metrics = _get_segmentation_metrics() densepose_metrics = _get_densepose_metrics(min_threshold) if len(coco_results ) == 0: # cocoapi does not handle empty results very well logger.warn("No predictions from the model! Set scores to -1") results_gps = {metric: -1 for metric in densepose_metrics} results_gpsm = {metric: -1 for metric in densepose_metrics} results_segm = {metric: -1 for metric in segm_metrics} return results_gps, results_gpsm, results_segm coco_dt = coco_gt.loadRes(coco_results) results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, segm_metrics, min_threshold) logger.info("Evaluation results for densepose segm: \n" + create_small_table(results_segm)) results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, densepose_metrics, min_threshold) logger.info("Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)) results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, densepose_metrics, min_threshold) logger.info("Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)) return results_gps, results_gpsm, results_segm
def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5): metrics = ["AP"] if min_threshold <= 0.201: metrics += ["AP20"] if min_threshold <= 0.301: metrics += ["AP30"] if min_threshold <= 0.401: metrics += ["AP40"] metrics.extend(["AP50", "AP75", "APm", "APl"]) logger = logging.getLogger(__name__) if len(coco_results ) == 0: # cocoapi does not handle empty results very well logger.warn("No predictions from the model! Set scores to -1") results_gps = {metric: -1 for metric in metrics} results_gpsm = {metric: -1 for metric in metrics} return results_gps, results_gpsm coco_dt = coco_gt.loadRes(coco_results) results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, metrics, min_threshold) logger.info("Evaluation results for densepose segm: \n" + create_small_table(results_segm)) results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics, min_threshold) logger.info("Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)) results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics, min_threshold) logger.info("Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)) return results_gps, results_gpsm, results_segm
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info(f"Evaluating {self._dataset_name}") with tempfile.TemporaryDirectory(prefix="digits_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class aps_base = defaultdict(list) aps_novel = defaultdict(list) exist_base, exist_novel = False, False for cls_id, cls_name in enumerate(self._classes): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } # write per class AP to logger per_class_res = { self._classes[idx]: ap for idx, ap in enumerate(aps[50]) } self._logger.info("Evaluate per-class mAP50:\n" + create_small_table(per_class_res)) self._logger.info("Evaluate overall bbox:\n" + create_small_table(ret["bbox"])) return ret
def _evaluate_predictions_on_lvis( lvis_gt, lvis_results, iou_type, class_names=None): """ Args: iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} from lvis import LVISEval, LVISResults lvis_results = LVISResults(lvis_gt, lvis_results) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info( "Evaluation results for {}: \n".format(iou_type) + \ create_small_table(results) ) return results
def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], }[iou_type] if coco_eval is None: self._logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} # the standard metrics results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} self._logger.info( "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) ) if class_names is None or len(class_names) <= 1: return results # Compute per-category AP # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) # tabulate it N_COLS = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP: \n".format(iou_type) + table) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results
def _evaluate_predictions_on_oid(oid_gt, oid_results_path, eval_seg=False): logger = logging.getLogger(__name__) metrics = ["AP50", "AP50_expand"] results = {} oid_eval = OIDEval(oid_gt, oid_results_path, 'bbox', expand_pred_label=False) oid_eval.run() oid_eval.print_results() results["AP50"] = oid_eval.get_results()["AP50"] if eval_seg: oid_eval = OIDEval(oid_gt, oid_results_path, 'segm', expand_pred_label=False) oid_eval.run() oid_eval.print_results() results["AP50_segm"] = oid_eval.get_results()["AP50"] else: oid_eval = OIDEval(oid_gt, oid_results_path, 'bbox', expand_pred_label=True) oid_eval.run() oid_eval.print_results() results["AP50_expand"] = oid_eval.get_results()["AP50"] logger.info("Evaluation results for bbox: \n" + \ create_small_table(results)) return results
def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or List): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} used by print_csv_format to print and saved in trainer storage """ results = {} # metrics = ["AP", "mMR", "Recall"] # add FPPI etc metrics = { "bbox": ["mAP", "mMR", "max_recall", "fppi0.01", "fppi0.1", "fppi1.0"], }[iou_type] if coco_eval is None: self._logger.warning("No predictions from the model!") return {metric: float("nan") for metric in metrics} # the standard metrics results = { k: v for k, v in coco_eval.items() if 'total' in k } # results = {metric: coco_eval[idx] # for idx, metric in enumerate(metrics)} small_table = create_small_table(results) self._logger.info( "Evaluation results for {}: \n".format(iou_type) + small_table ) # print(coco_eval) return results
def evaluate(self): if self._distributed: synchronize() endpoint_errors = all_gather(self._endpoint_errors) endpoint_errors = [per_image for per_gpu in endpoint_errors for per_image in per_gpu] self._predictions = all_gather(self._predictions) if not is_main_process(): return if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "flow_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) ave_epe = sum(endpoint_errors) / len(endpoint_errors) res = {"ave_epe": ave_epe} if self._output_dir: file_path = os.path.join(self._output_dir, "flow_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"flow": res}) small_table = create_small_table(res) self._logger.info("Evaluation results for flow: \n" + small_table) dump_info_one_task = { "task": "flow", "tables": [small_table], } _dump_to_markdown([dump_info_one_task]) return results
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) self.submit_results = comm.gather(self.submit_results, dst=0) self.submit_results = list(itertools.chain(*self.submit_results)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning( "[COCOEvaluator] Did not receive valid predictions.") return {} self._logger.info("Preparing results for COCO format ...") self._coco_results = list( itertools.chain(*[x["instances"] for x in self._predictions])) if self._output_dir: res_file = os.path.join(self._output_dir, "crowdhuman_evaluate_results.json") self._logger.info("Saving results to {}".format(res_file)) with PathManager.open(res_file, "w") as f: f.write(json.dumps(self._coco_results)) f.flush() self._logger.info("Saving results to {}".format(res_file)) submit_file = os.path.join(self._output_dir, "submission.txt") with PathManager.open(submit_file, "w") as f: for result in self.submit_results: f.write(json.dumps(result)) f.write("\n") f.flush() self._logger.info("Evaluating predictions ...") metrics = ["ALL"] results = {} ret_results = OrderedDict() for gt_json in [self._metadata.gt_file]: name = gt_json.split("/")[-1].split(".")[0] for id_setup in range(len(metrics)): cocoGt = COCO(gt_json) cocoDt = cocoGt.loadRes(res_file) imgIds = sorted(cocoGt.getImgIds()) cocoEval = CrowdHumanEval(cocoGt, cocoDt, "bbox") cocoEval.params.imgIds = imgIds cocoEval.evaluate(id_setup) cocoEval.accumulate() performance_dict = cocoEval.summarize(id_setup) for key in performance_dict.keys(): results[name + " " + key] = performance_dict[key] self._logger.info( "Evaluation results for Pedestrian Detection on CrowdHuman: \n" + create_small_table(results)) ret_results["PedestrianDetection"] = copy.deepcopy(results) return ret_results
def _evaluate_predictions_on_lvis( lvis_gt, lvis_results, iou_type, max_dets=None, class_names=None ): """ Copied from detectron2.evaluation.lvis_evaluation, with support for max_dets. Args: iou_type (str): kpt_oks_sigmas (list[float]): max_dets (None or int) class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) from lvis import LVISEval, LVISResults ##### # <modified> if max_dets is None: max_dets = 300 lvis_results_obj = LVISResults(lvis_gt, lvis_results, max_dets=max_dets) lvis_eval = LVISEval(lvis_gt, lvis_results_obj, iou_type) lvis_eval.params.max_dets = max_dets # </modified> ##### lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info( f"Evaluation results for {iou_type}, max_dets {max_dets} \n" + create_small_table(results) ) return results
def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None): """ Args: iou_type (str): max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP This limit, by default of the LVIS dataset, is 300. class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) if max_dets_per_image is None: max_dets_per_image = 300 # Default for LVIS dataset from lvis import LVISEval, LVISResults logger.info( f"Evaluating with max detections per image = {max_dets_per_image}") lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) return results
def _derive_results_from_coco_eval(coco_eval, eval_mode_name, metrics, class_names, min_threshold, img_ids): if img_ids is not None: coco_eval.params.imgIds = img_ids coco_eval.params.iouThrs = np.linspace( min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() results = { metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics) } logger = logging.getLogger(__name__) logger.info( f"Evaluation results for densepose, {eval_mode_name} metric: \n" + create_small_table(results)) if class_names is None or len(class_names) <= 1: return results # Compute per-category AP, the same way as it is done in D2 # (see detectron2/evaluation/coco_evaluation.py): precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append((f"{name}", float(ap * 100))) # tabulate it n_cols = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest( *[results_flatten[i::n_cols] for i in range(n_cols)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (n_cols // 2), numalign="left", ) logger.info(f"Per-category {eval_mode_name} AP: \n" + table) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results
def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, interactness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append( prediction["proposals"].proposal_boxes.tensor.numpy()) interactness_logits.append( prediction["proposals"].interactness_logits.numpy()) proposal_data = { "boxes": boxes, "interactness_logits": interactness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open( os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: logger.info("Annotations are not available for evaluation.") return logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 500]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) for sub_key in ["", "_known", "_novel"]: key = "R{}{}@{:d}+IoU=0.5".format(suffix, sub_key, limit) res[key] = float( stats["recalls{}".format(sub_key)][0].item() * 100) print(" R{}{}@{:d}[email protected] = {:.3f}".format( suffix, sub_key, limit, res[key])) logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
def _eval_box_proposals(self): """ Evaluate the box proposals in self._predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value print(bbox_mode) ids, boxes, objectness_logits = [], [], [] for prediction in self._predictions: ids.append(prediction["image_id"]) boxes.append( prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append( prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } #pdb.set_trace() with PathManager.open( os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(self._predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
def _eval_affinity(self, predictions): """ Evaluate plane correspondence. """ logger.info("Evaluating embedding affinity ...") labels = [] preds = [] for pred in predictions: labels.extend(pred["labels"]) preds.extend(pred["preds"]) best_auc_ipaa = 0 best_threshold = 0 best_ipaa_dict = None for th in predictions[0]["ipaa_by_threshold"].keys(): IPAA_dict = {} for i in range(11): IPAA_dict[i * 10] = 0 for pred in predictions: for key in IPAA_dict.keys(): if pred["ipaa_by_threshold"][th] >= key / 100: IPAA_dict[key] += 1 auc_ipaa = compute_auc(IPAA_dict) if auc_ipaa > best_auc_ipaa: best_auc_ipaa = auc_ipaa best_threshold = th best_ipaa_dict = IPAA_dict if not len(labels): return auc = roc_auc_score(labels, preds) * 100 ap = average_precision_score(labels, preds) * 100 if best_ipaa_dict is None: results = { f"ap@iou={self._filter_iou}": ap, f"auc@iou={self._filter_iou}": auc, } else: results = { f"ap@iou={self._filter_iou}": ap, f"auc@iou={self._filter_iou}": auc, f"ipaa-80": best_ipaa_dict[80] / len(predictions), f"ipaa-90": best_ipaa_dict[90] / len(predictions), f"ipaa-100": best_ipaa_dict[100] / len(predictions), f"auc-ipaa": best_auc_ipaa, f"hungarian-threshold": best_threshold, } logger.info("Affinity metrics: \n" + create_small_table(results)) self._results.update(results)
def _calculate_accuracy_recall(self, gts, preds): cat = self._metadata.get("classification_classes", None) num_cls = len(cat) assert len(preds) > 0 and len(gts) image_to_idx = {} gt_cls_count = [0 for i in range(num_cls + 1)] for i, c in enumerate(gts): image_to_idx[c["image_id"]] = i gt_cls_count[c["category2_id"]] += 1 count, c_model, c_part, c_toward = 0, 0, 0, 0 cls_count = [0 for i in range(num_cls + 1)] for pd in preds: gt_idx = image_to_idx[pd["image_id"]] gt = gts[gt_idx] if gt["category2_id"] == pd["category_id"]: count += 1 cls_count[pd["category_id"]] += 1 else: if self._view_error: self._vis_result(gt, pd) if gt["category2_id"] == 2: c_model += 1 if pd["toward"] == gt["toward"]: c_toward += 1 if pd["part"] == gt["part"]: c_part += 1 accuracy = float(count) / (float(len(gts)) + 0.0001) * 100 part_accuracy = float(c_part) / (float(c_model) + 0.0001) * 100 toward_accuracy = float(c_toward) / (float(c_model) + 0.0001) * 100 cls_acc = {} for i, (pd, gt) in enumerate(zip(cls_count, gt_cls_count)): if not gt == 0: cls_acc[cat[i - 1] + "_acc"] = float(pd) / (float(gt) + 0.0001) * 100 results = { "accuracy": accuracy, "part_accuracy": part_accuracy, "toward_accuracy": toward_accuracy } results.update(cls_acc) self._logger.info("Evaluation results for classification: \n" + create_small_table(results)) return results
def evaluate(self): if self._distributed: comm.synchronize() self._predictions = comm.gather(self._predictions, dst=0) self._predictions = list(itertools.chain(*self._predictions)) if not comm.is_main_process(): return {} if len(self._predictions) == 0: self._logger.warning("[ClassificationEvaluator] Did not receive valid predictions.") return {} pred_nums = [0] * len(self._metadata.classes) gt_nums = [0] * len(self._metadata.classes) correct_nums = [0] * len(self._metadata.classes) for p in self._predictions: if p['gt_class_id'] >= 0: gt_nums[p['gt_class_id']] += 1 if p['gt_class_id'] == p['pred_class_id']: correct_nums[p['gt_class_id']] += 1 if p['pred_class_id'] >= 0: pred_nums[p['pred_class_id']] += 1 result = {} eps = 0.00001 for i, cls in enumerate(self._metadata.classes): idx = self._metadata.class_to_idx[cls] acc = correct_nums[idx] / (pred_nums[idx] + eps) recall = correct_nums[idx] / (gt_nums[idx] + eps) result.update({ cls + "_acc": acc, cls + "_recall": recall }) total_acc = sum(correct_nums) / (sum(pred_nums) + eps) total_recall = sum(correct_nums) / (sum(gt_nums) + eps) result.update({ "total_acc": total_acc, "total_recall": total_recall }) self._logger.info( "Evaluation results for classification: \n" + create_small_table(result) ) results = OrderedDict() results["classification"] = result return results
def _evaluate_predictions_on_coco(coco_gt, coco_results): metrics = ["AP", "AP50", "AP75", "APm", "APl"] logger = logging.getLogger(__name__) if len(coco_results) == 0: # cocoapi does not handle empty results very well logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} coco_dt = coco_gt.loadRes(coco_results) coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose") coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() # the standard metrics results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} logger.info("Evaluation results for densepose: \n" + create_small_table(results)) return results
def _evaluate_predictions_ar(self, predictions): res = {} aspect_ratios = { "all ratios": [0 / 1, 1e5 / 1], " 0 - 1/5": [0 / 1, 1 / 5], "1/5 - 1/3": [1 / 5, 1 / 3], "1/3 - 3/1": [1 / 3, 3 / 1], "3/1 - 5/1": [3 / 1, 5 / 1], "5/1 - INF": [5 / 1, 1e5 / 1], } areas = { "all areas": [0, float("inf")], "small": [0, 32 ** 2], "medium": [32 ** 2, 96 ** 2], "large": [96 ** 2, float("inf")] } limits = [100] for limit in limits: stats = _evaluate_predictions_ar( predictions, self._coco_api, self._metadata, aspect_ratios=aspect_ratios, areas=areas, limit=limit) recalls = stats.pop("recalls") for i, key in enumerate(areas): res["AR-{}@{:d}".format(key, limit)] = recalls[:, -1, 0, i].mean() * 100 res["mAR-{}@{:d}".format(key, limit)] = recalls[:, :-1, 0, i].mean() * 100 for i, key in enumerate(aspect_ratios): res["AR-{}@{:d}".format(key, limit)] = recalls[:, -1, i, 0].mean() * 100 res["mAR-{}@{:d}".format(key, limit)] = recalls[:, :-1, i, 0].mean() * 100 key = "AR@{:d}".format(limit) res[key] = float(stats["ar"].item() * 100) key = "mAR@{:d}".format(limit) res[key] = float(stats["mar"].item() * 100) print("Proposal metrics: \n" + create_small_table(res)) # stats["recalls"] = recalls res["ar-stats"] = stats self._results["ar"] = res
def main(predictions_file_path, json_file="datasets/coco/annotations/instances_val2017.json", oriented=False): with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) with open(predictions_file_path, mode="rb") as fp: predictions = torch.load(fp) print(len(predictions)) res = {} if oriented: aspect_ratios = { "all": (0, 1), "0-0.2": (0, 0.2), "0.2-0.3*": (0.2, 1 / 3), "0.3*-1": (0.3, 1), } else: aspect_ratios = { "all": [0 / 1, 1000 / 1], "l1": [0 / 1, 1 / 5], "l2": [1 / 5, 1 / 3], "l3": [1 / 3, 3 / 1], "l4": [3 / 1, 5 / 1], "l5": [5 / 1, 1000 / 1], } num_pos_dict = dict() limits = [100] for limit in limits: for name, ratio_range in aspect_ratios.items(): stats = evaluate_box_proposal(predictions, coco_api, aspect_ratio_range=ratio_range, limit=limit, oriented=oriented) key = "AR{}@{:d}".format(name, limit) res[key] = float(stats["ar"].item() * 100) num_pos_dict[name] = stats["num_pos"] print("Proposal metrics: \n" + create_small_table(res))
def evaluate(self): """ Compute evaluation metrics based on accumulated data. Returns: dict: keys are [ErrorRate, Accuracy, Precision, Recall, Specificity] """ predictions = self._predictions if len(predictions['gt_cls']) == 0: self._logger.warning("[BinaryClassificationEvaluator] Did not receive valid predictions.") return {} if self.validate: pred_tensor = torch.stack(predictions['logits'], dim=0) gt_tensor = torch.LongTensor(predictions['gt_cls']) loss = nn.CrossEntropyLoss()(pred_tensor, gt_tensor) # if self._output_dir: # PathManager.mkdirs(self._output_dir) # file_path = os.path.join(self._output_dir, "class_predictions.pth") # with PathManager.open(file_path, "wb") as f: # torch.save(predictions, f) gt_ = np.array(predictions['gt_cls']) pred_ = np.array(predictions['pred_cls']) total = len(gt_) tp_ = np.logical_and( gt_, pred_) # gt_ = 1 AND pred_ = 1 fp_ = np.logical_and(np.logical_not(gt_), pred_) # gt_ = 0 AND pred_ = 1 fn_ = np.logical_and(gt_ , np.logical_not(pred_)) # gt_ = 1 AND pred_ = 0 tn_ = np.logical_and(np.logical_not(gt_), np.logical_not(pred_)) # gt_ = 0 AND pred_ = 0 tp = np.sum(tp_) fp = np.sum(fp_) fn = np.sum(fn_) tn = np.sum(tn_) P = tp + fn N = tn + fp self._results = OrderedDict() class_names = self._metadata.thing_classes # Indicate which class is Positive self._results['ErrorRate'] = (fp + fn) / ((N + P) + 1e-5) self._results['Accuracy'] = (tp + tn) / ((N + P) + 1e-5) self._results['Precision'] = tp / ((tp + fp) + 1e-5) self._results['Recall'] = tp / (P + 1e-5) self._results['Specificity'] = 1 - fp / (N + 1e-5) results = { key: float(value * 100 if self._results[key] >= 0 else "nan") for key, value in self._results.items() } results[class_names[1]+'(P)'] = P results[class_names[0]+'(N)'] = N self._results[class_names[1]+'(P)'] = P self._results[class_names[0]+'(N)'] = N if self.validate: self._results['loss_cls'] = loss self._logger.info( "Evaluation results for classification: \n" + create_small_table(results) ) if not np.isfinite(sum(self._results.values())): self._logger.info("Note that some metrics cannot be computed.") return copy.deepcopy(self._results)
def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], }[iou_type] if coco_eval is None: self._logger.warn( "No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} # the standard metrics results = { metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics) } self._logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) if class_names is None or len(class_names) <= 1: return results # Compute per-category AP # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) histogram = np.load( os.path.join(global_cfg.OUTPUT_DIR, f'histogram_{global_cfg.DATASETS.TEST[0]}.npy')) ind_sorted = np.argsort(histogram)[::-1] a = np.array(results_per_category)[ind_sorted] bins = range(len(class_names)) fig = plt.figure(figsize=(10, 8)) plt.bar(bins, height=a[:, 1].astype(float), color='#F6CD61') plt.xticks(bins, np.array(class_names)[ind_sorted], rotation=90, fontsize=5) plt.ylim(bottom=0, top=100) storage = get_event_storage() storage.put_fig("AP", fig) if global_cfg.MODEL.GAMBLER_HEAD.SAVE_VIS_FILES is True: fig.savefig( os.path.join(global_cfg.OUTPUT_DIR, "AP_" + str(storage.iter) + ".pdf")) plt.close('all') # tabulate it N_COLS = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest( *[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP: \n".format(iou_type) + table) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results
def _derive_coco_results( self, coco_eval, iou_type, iouThr=None, class_names=None, known_classes=None, novel_classes=None, ): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], }[iou_type] if coco_eval is None: logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} # the standard metrics results = { metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") for idx, metric in enumerate(metrics) } logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) if not np.isfinite(sum(results.values())): logger.info("Note that some metrics cannot be computed.") if class_names is None or len(class_names) <= 1: return results if "person" in known_classes: known_classes.remove("person") # Compute per-category AP precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] results_known_category = [] # Exclude "person" category results_novel_category = [] for idx, name in enumerate(class_names): # iou threshold index t: 0.5:0.05:0.9 # area range index 0: all area ranges # max dets index -1: typically 100 per image if iouThr is not None: t = np.where(iouThr == coco_eval.params.iouThrs)[0] precision = precisions[t, :, idx, 0, -1] else: precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) if name in known_classes: results_known_category.append(ap * 100) if name in novel_classes: results_novel_category.append(ap * 100) str_suffix = "{:d}".format(int(iouThr * 100)) if iouThr else "" results_known_novel_split = { "AP{}-total".format(str_suffix): np.mean(results_known_category+results_novel_category), "AP{}-known".format(str_suffix): np.mean(results_known_category), "AP{}-novel".format(str_suffix): np.mean(results_novel_category) \ if len(results_novel_category) else "nan" } # tabulate it N_COLS = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest( *[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (N_COLS // 2), numalign="left", ) logger.info("Per-category {} AP: \n".format(iou_type) + table) logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) logger.info( "Evaluation results for {} known/novel splits: \n".format(iou_type) + \ create_small_table(results_known_novel_split) ) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012)) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class aps_base = defaultdict(list) aps_novel = defaultdict(list) exist_base, exist_novel = False, False for cls_id, cls_name in enumerate(self._class_names): lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) if self._base_classes is not None and cls_name in self._base_classes: aps_base[thresh].append(ap * 100) exist_base = True if self._novel_classes is not None and cls_name in self._novel_classes: aps_novel[thresh].append(ap * 100) exist_novel = True ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = { "AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75] } # adding evaluation of the base and novel classes if exist_base: mAP_base = {iou: np.mean(x) for iou, x in aps_base.items()} ret["bbox"].update({ "bAP": np.mean(list(mAP_base.values())), "bAP50": mAP_base[50], "bAP75": mAP_base[75] }) if exist_novel: mAP_novel = {iou: np.mean(x) for iou, x in aps_novel.items()} ret["bbox"].update({ "nAP": np.mean(list(mAP_novel.values())), "nAP50": mAP_novel[50], "nAP75": mAP_novel[75] }) # write per class AP to logger per_class_res = { self._class_names[idx]: ap for idx, ap in enumerate(aps[50]) } self._logger.info("Evaluate per-class mAP50:\n" + create_small_table(per_class_res)) self._logger.info("Evaluate overall bbox:\n" + create_small_table(ret["bbox"])) return ret
def _eval_depth(self, predictions): depth_l1_dist = [p["depth_l1_dist"] for p in predictions] result = {f"depth_l1_dist": np.mean(depth_l1_dist)} logger.info("Depth metrics: \n" + create_small_table(result)) self._results.update(result)
def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], }[iou_type] if coco_eval is None: self._logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} # the standard metrics results = { metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") for idx, metric in enumerate(metrics) } self._logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) if not np.isfinite(sum(results.values())): self._logger.info("Note that some metrics cannot be computed.") if class_names is None or len(class_names) <= 1: return results # Compute per-category AP # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] # TODO(): Rewrite this more modularly results_per_category_AP50 = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) # Compute for AP50 # 0th first index is IOU .50 precision = precisions[0, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category_AP50.append( ("{}".format(name), float(ap * 100))) table = _tabulate_per_category(results_per_category) self._logger.info("Per-category {} AP: \n".format(iou_type) + table) tableAP50 = _tabulate_per_category(results_per_category_AP50, "AP50") self._logger.info("Per-category {} AP50: \n".format(iou_type) + tableAP50) results.update({"AP-" + name: ap for name, ap in results_per_category}) # Update AP50 results.update( {"AP50-" + name: ap for name, ap in results_per_category_AP50}) return results
def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append( prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append( prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open( os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit, classes=self.classes_to_eval) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) def coco_clsid_to_name(clsid, metadata): thing_classes = metadata.thing_classes coco_id_to_contiguous_id = metadata.thing_dataset_id_to_contiguous_id return thing_classes[coco_id_to_contiguous_id[clsid]] results_per_category = [] limit = 1000 area, suffix = 'all', "" for cls_id in self.classes_to_eval: class_name = coco_clsid_to_name(cls_id, self._metadata) self._logger.info(f"Result for cls_id {class_name}: {cls_id}") stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit, classes=[cls_id]) key = "AR{}@{:d}-{}".format(suffix, limit, class_name) value = float(stats["ar"].item() * 100) results_per_category.append((key, value)) print(results_per_category) print(np.mean([ap for _, ap in results_per_category])) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res
def _eval_camera(self, predictions): acc_threshold = { "tran": 1.0, "rot": 30, } # threshold for translation and rotation error to say prediction is correct. tran_logits = torch.stack( [p["camera"]["logits"]["tran"] for p in predictions]).numpy() rot_logits = torch.stack( [p["camera"]["logits"]["rot"] for p in predictions]).numpy() gt_tran_cls = torch.stack( [p["camera"]["gts"]["tran_cls"] for p in predictions]).numpy() gt_rot_cls = torch.stack( [p["camera"]["gts"]["rot_cls"] for p in predictions]).numpy() gt_tran = np.vstack([p["camera"]["gts"]["tran"] for p in predictions]) gt_rot = np.vstack([p["camera"]["gts"]["rot"] for p in predictions]) topk_acc = get_camera_top_k_acc( logits={ "tran": tran_logits, "rot": rot_logits }, gts={ "tran_cls": gt_tran_cls, "rot_cls": gt_rot_cls }, n_clusters={ "tran": self.kmeans_trans.n_clusters, "rot": self.kmeans_rots.n_clusters, }, ) topk_acc["tran"] = np.cumsum(topk_acc["tran"]) / np.sum( topk_acc["tran"]) topk_acc["rot"] = np.cumsum(topk_acc["rot"]) / np.sum(topk_acc["rot"]) pred_tran = self.class2xyz(np.argmax(tran_logits, axis=1)) pred_rot = self.class2quat(np.argmax(rot_logits, axis=1)) top1_error = { "tran": np.linalg.norm(gt_tran - pred_tran, axis=1), "rot": angle_error_vec(pred_rot, gt_rot), } top1_accuracy = { "tran": (top1_error["tran"] < acc_threshold["tran"]).sum() / len(top1_error["tran"]), "rot": (top1_error["rot"] < acc_threshold["rot"]).sum() / len(top1_error["rot"]), } camera_metrics = { f"top1 T err < {acc_threshold['tran']}": top1_accuracy["tran"] * 100, f"top1 R err < {acc_threshold['rot']}": top1_accuracy["rot"] * 100, f"T mean err": np.mean(top1_error["tran"]), f"R mean err": np.mean(top1_error["rot"]), f"T median err": np.median(top1_error["tran"]), f"R median err": np.median(top1_error["rot"]), } logger.info("Camera metrics: \n" + create_small_table(camera_metrics)) topk_metrics = { f"top1 T acc": topk_acc["tran"][0] * 100, f"top5 T acc": topk_acc["tran"][4] * 100, f"top10 T acc": topk_acc["tran"][9] * 100, f"top32 T acc": topk_acc["tran"][31] * 100, f"top1 R acc": topk_acc["rot"][0] * 100, f"top5 R acc": topk_acc["rot"][4] * 100, f"top10 R acc": topk_acc["rot"][9] * 100, f"top32 R acc": topk_acc["rot"][31] * 100, } logger.info("Camera topk: \n" + create_small_table(topk_metrics)) camera_metrics.update(topk_metrics) self._results.update(camera_metrics) summary = { "errors": np.array([top1_error["tran"], top1_error["rot"]]), "preds": { "tran": pred_tran, "rot": pred_rot, "tran_cls": np.argmax(tran_logits, axis=1).reshape(-1, 1), "rot_cls": np.argmax(rot_logits, axis=1).reshape(-1, 1), }, "gts": { "tran": gt_tran, "rot": gt_rot, "tran_cls": gt_tran_cls, "rot_cls": gt_rot_cls, }, "logits_sms": { "tran": softmax(tran_logits, axis=1), "rot": softmax(rot_logits, axis=1), }, "accuracy": [top1_accuracy["tran"], top1_accuracy["rot"]], "keys": [p["0"]["file_name"] + p["1"]["file_name"] for p in predictions], } return summary
def evaluate_for_planes( predictions, dataset, metadata, filter_iou, iou_thresh=0.5, normal_threshold=30, offset_threshold=0.3, device=None, ): if device is None: device = torch.device("cpu") # classes cat_ids = sorted(dataset.getCatIds()) reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } # initialize tensors to record box & mask AP, number of gt positives box_apscores, box_aplabels = {}, {} mask_apscores, mask_aplabels = {}, {} plane_apscores, plane_aplabels = {}, {} plane_offset_errs, plane_normal_errs = [], [] npos = {} for cat_id in cat_ids: box_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] box_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] mask_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] mask_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] plane_apscores[cat_id] = [ torch.tensor([], dtype=torch.float32, device=device) ] plane_aplabels[cat_id] = [ torch.tensor([], dtype=torch.uint8, device=device) ] npos[cat_id] = 0.0 # number of gt positive instances per class for gt_ann in dataset.dataset["annotations"]: gt_label = gt_ann["category_id"] npos[gt_label] += 1.0 for prediction in predictions: original_id = prediction["image_id"] image_width = dataset.loadImgs([original_id])[0]["width"] image_height = dataset.loadImgs([original_id])[0]["height"] if "instances" not in prediction: continue num_img_preds = len(prediction["instances"]) if num_img_preds == 0: continue # predictions scores, boxes, labels, masks_rles = [], [], [], [] for ins in prediction["instances"]: scores.append(ins["score"]) boxes.append(ins["bbox"]) labels.append(ins["category_id"]) masks_rles.append(ins["segmentation"]) boxes = np.array(boxes) # xywh from coco boxes = BoxMode.convert(boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) boxes = Boxes(torch.tensor(np.array(boxes))).to(device) planes = prediction["pred_plane"] # ground truth # anotations corresponding to original_id (aka coco image_id) gt_ann_ids = dataset.getAnnIds(imgIds=[original_id]) gt_anns = dataset.loadAnns(gt_ann_ids) # get original ground truth mask, box, label & mesh gt_boxes, gt_labels, gt_mask_rles, gt_planes = [], [], [], [] for ann in gt_anns: gt_boxes.append(ann["bbox"]) gt_labels.append(ann["category_id"]) if isinstance(ann["segmentation"], list): polygons = [ np.array(p, dtype=np.float64) for p in ann["segmentation"] ] rles = mask_util.frPyObjects(polygons, image_height, image_width) rle = mask_util.merge(rles) elif isinstance(ann["segmentation"], dict): # RLE rle = ann["segmentation"] else: raise TypeError( f"Unknown segmentation type {type(ann['segmentation'])}!") gt_mask_rles.append(rle) gt_planes.append(ann["plane"]) gt_boxes = np.array(gt_boxes) # xywh from coco gt_boxes = BoxMode.convert(gt_boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) faux_gt_targets = Boxes( torch.tensor(gt_boxes, dtype=torch.float32, device=device)) # box iou boxiou = pairwise_iou(boxes, faux_gt_targets) # filter predictions with iou > filter_iou # valid_pred_ids = (boxiou > filter_iou).sum(axis=1) > 0 # mask iou miou = mask_util.iou(masks_rles, gt_mask_rles, [0] * len(gt_mask_rles)) plane_metrics = compare_planes(planes, gt_planes) # sort predictions in descending order scores = torch.tensor(np.array(scores), dtype=torch.float32) scores_sorted, idx_sorted = torch.sort(scores, descending=True) # record assigned gt. box_covered = [] mask_covered = [] plane_covered = [] for pred_id in range(num_img_preds): # remember we only evaluate the preds that have overlap more than # iou_filter with the ground truth prediction # if valid_pred_ids[idx_sorted[pred_id]] == 0: # continue # Assign pred to gt gt_id = torch.argmax(boxiou[idx_sorted[pred_id]]) gt_label = gt_labels[gt_id] # map to dataset category id pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]]] pred_miou = miou[idx_sorted[pred_id], gt_id] pred_biou = boxiou[idx_sorted[pred_id], gt_id] pred_score = scores[idx_sorted[pred_id]].view(1).to(device) normal = plane_metrics["norm"][idx_sorted[pred_id], gt_id].item() offset = plane_metrics["offset"][idx_sorted[pred_id], gt_id].item() plane_offset_errs.append(offset) plane_normal_errs.append(normal) # mask tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_miou > iou_thresh) and (gt_id not in mask_covered)): tpfp[0] = 1 mask_covered.append(gt_id) mask_apscores[pred_label].append(pred_score) mask_aplabels[pred_label].append(tpfp) # box tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (pred_biou > iou_thresh) and (gt_id not in box_covered)): tpfp[0] = 1 box_covered.append(gt_id) box_apscores[pred_label].append(pred_score) box_aplabels[pred_label].append(tpfp) # plane tpfp = torch.tensor([0], dtype=torch.uint8, device=device) if ((pred_label == gt_label) and (normal < normal_threshold) and (offset < offset_threshold) and (gt_id not in plane_covered)): tpfp[0] = 1 plane_covered.append(gt_id) plane_apscores[pred_label].append(pred_score) plane_aplabels[pred_label].append(tpfp) # check things for eval # assert npos.sum() == len(dataset.dataset["annotations"]) # convert to tensors detection_metrics = {} boxap, maskap, planeap = 0.0, 0.0, 0.0 valid = 0.0 for cat_id in cat_ids: cat_name = dataset.loadCats([cat_id])[0]["name"] if npos[cat_id] == 0: continue valid += 1 cat_box_ap = VOCap.compute_ap( torch.cat(box_apscores[cat_id]), torch.cat(box_aplabels[cat_id]), npos[cat_id], ).item() boxap += cat_box_ap detection_metrics["box_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_box_ap cat_mask_ap = VOCap.compute_ap( torch.cat(mask_apscores[cat_id]), torch.cat(mask_aplabels[cat_id]), npos[cat_id], ).item() maskap += cat_mask_ap detection_metrics["mask_ap@%.1f - %s" % (iou_thresh, cat_name)] = cat_mask_ap cat_plane_ap = VOCap.compute_ap( torch.cat(plane_apscores[cat_id]), torch.cat(plane_aplabels[cat_id]), npos[cat_id], ).item() planeap += cat_plane_ap detection_metrics["plane_ap@iou%.1fnormal%.1foffset%.1f - %s" % (iou_thresh, normal_threshold, offset_threshold, cat_name)] = cat_plane_ap detection_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid detection_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid detection_metrics["plane_ap@iou%.1fnormal%.1foffset%.1f" % (iou_thresh, normal_threshold, offset_threshold)] = ( planeap / valid) logger.info("Detection metrics: \n" + create_small_table(detection_metrics)) plane_metrics = {} plane_normal_errs = np.array(plane_normal_errs) plane_offset_errs = np.array(plane_offset_errs) plane_metrics["%normal<10"] = (sum(plane_normal_errs < 10) / len(plane_normal_errs) * 100) plane_metrics["%normal<30"] = (sum(plane_normal_errs < 30) / len(plane_normal_errs) * 100) plane_metrics["%offset<0.5"] = (sum(plane_offset_errs < 0.5) / len(plane_offset_errs) * 100) plane_metrics["%offset<0.3"] = (sum(plane_offset_errs < 0.3) / len(plane_offset_errs) * 100) plane_metrics["mean_normal"] = plane_normal_errs.mean() plane_metrics["median_normal"] = np.median(plane_normal_errs) plane_metrics["mean_offset"] = plane_offset_errs.mean() plane_metrics["median_offset"] = np.median(plane_offset_errs) logger.info("Plane metrics: \n" + create_small_table(plane_metrics)) plane_metrics.update(detection_metrics) return plane_metrics