コード例 #1
0
def _evaluate_predictions_on_coco(coco_gt, coco_results):
    metrics = ["AP", "AP50", "AP75", "APm", "APl"]

    logger = logging.getLogger(__name__)

    if len(coco_results
           ) == 0:  # cocoapi does not handle empty results very well
        logger.warn("No predictions from the model! Set scores to -1")
        results_gps = {metric: -1 for metric in metrics}
        results_gpsm = {metric: -1 for metric in metrics}
        return results_gps, results_gpsm

    coco_dt = coco_gt.loadRes(coco_results)
    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics)
    logger.info("Evaluation results for densepose, GPS metric: \n" +
                create_small_table(results_gps))
    results_iou = _evaluate_predictions_on_coco_iou(coco_gt, coco_dt, metrics)
    logger.info("Evaluation results for densepose, IOU metric: \n" +
                create_small_table(results_iou))
    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt,
                                                      metrics)
    logger.info("Evaluation results for densepose, GPSm metric: \n" +
                create_small_table(results_gpsm))

    return results_gps, results_gpsm, results_iou
コード例 #2
0
def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5):
    logger = logging.getLogger(__name__)

    segm_metrics = _get_segmentation_metrics()
    densepose_metrics = _get_densepose_metrics(min_threshold)
    if len(coco_results
           ) == 0:  # cocoapi does not handle empty results very well
        logger.warn("No predictions from the model! Set scores to -1")
        results_gps = {metric: -1 for metric in densepose_metrics}
        results_gpsm = {metric: -1 for metric in densepose_metrics}
        results_segm = {metric: -1 for metric in segm_metrics}
        return results_gps, results_gpsm, results_segm

    coco_dt = coco_gt.loadRes(coco_results)
    results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt,
                                                      segm_metrics,
                                                      min_threshold)
    logger.info("Evaluation results for densepose segm: \n" +
                create_small_table(results_segm))
    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt,
                                                    densepose_metrics,
                                                    min_threshold)
    logger.info("Evaluation results for densepose, GPS metric: \n" +
                create_small_table(results_gps))
    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt,
                                                      densepose_metrics,
                                                      min_threshold)
    logger.info("Evaluation results for densepose, GPSm metric: \n" +
                create_small_table(results_gpsm))
    return results_gps, results_gpsm, results_segm
コード例 #3
0
ファイル: evaluator.py プロジェクト: gaurav67890/Detectron2
def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5):
    metrics = ["AP"]
    if min_threshold <= 0.201:
        metrics += ["AP20"]
    if min_threshold <= 0.301:
        metrics += ["AP30"]
    if min_threshold <= 0.401:
        metrics += ["AP40"]
    metrics.extend(["AP50", "AP75", "APm", "APl"])
    logger = logging.getLogger(__name__)

    if len(coco_results
           ) == 0:  # cocoapi does not handle empty results very well
        logger.warn("No predictions from the model! Set scores to -1")
        results_gps = {metric: -1 for metric in metrics}
        results_gpsm = {metric: -1 for metric in metrics}
        return results_gps, results_gpsm

    coco_dt = coco_gt.loadRes(coco_results)
    results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt,
                                                      metrics, min_threshold)
    logger.info("Evaluation results for densepose segm: \n" +
                create_small_table(results_segm))
    results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics,
                                                    min_threshold)
    logger.info("Evaluation results for densepose, GPS metric: \n" +
                create_small_table(results_gps))
    results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt,
                                                      metrics, min_threshold)
    logger.info("Evaluation results for densepose, GPSm metric: \n" +
                create_small_table(results_gpsm))
    return results_gps, results_gpsm, results_segm
コード例 #4
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
        """
        all_predictions = comm.gather(self._predictions, dst=0)
        if not comm.is_main_process():
            return
        predictions = defaultdict(list)
        for predictions_per_rank in all_predictions:
            for clsid, lines in predictions_per_rank.items():
                predictions[clsid].extend(lines)
        del all_predictions

        self._logger.info(f"Evaluating {self._dataset_name}")

        with tempfile.TemporaryDirectory(prefix="digits_voc_eval_") as dirname:
            res_file_template = os.path.join(dirname, "{}.txt")

            aps = defaultdict(list)  # iou -> ap per class
            aps_base = defaultdict(list)
            aps_novel = defaultdict(list)
            exist_base, exist_novel = False, False
            for cls_id, cls_name in enumerate(self._classes):
                lines = predictions.get(cls_id, [""])

                with open(res_file_template.format(cls_name), "w") as f:
                    f.write("\n".join(lines))

                for thresh in range(50, 100, 5):
                    rec, prec, ap = voc_eval(
                        res_file_template,
                        self._anno_file_template,
                        self._image_set_path,
                        cls_name,
                        ovthresh=thresh / 100.0,
                    )
                    aps[thresh].append(ap * 100)

        ret = OrderedDict()
        mAP = {iou: np.mean(x) for iou, x in aps.items()}
        ret["bbox"] = {
            "AP": np.mean(list(mAP.values())),
            "AP50": mAP[50],
            "AP75": mAP[75]
        }

        # write per class AP to logger
        per_class_res = {
            self._classes[idx]: ap
            for idx, ap in enumerate(aps[50])
        }

        self._logger.info("Evaluate per-class mAP50:\n" +
                          create_small_table(per_class_res))
        self._logger.info("Evaluate overall bbox:\n" +
                          create_small_table(ret["bbox"]))
        return ret
コード例 #5
0
def _evaluate_predictions_on_lvis(
    lvis_gt, lvis_results, iou_type, class_names=None):
    """
    Args:
        iou_type (str):
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model! Set scores to -1")
        return {metric: -1 for metric in metrics}

    from lvis import LVISEval, LVISResults

    lvis_results = LVISResults(lvis_gt, lvis_results)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info(
        "Evaluation results for {}: \n".format(iou_type) + \
            create_small_table(results)
    )
    return results
コード例 #6
0
    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
        """
        Derive the desired score numbers from summarized COCOeval.
        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.
        Returns:
            a dict of {metric name: score}
        """

        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            self._logger.warn("No predictions from the model! Set scores to -1")
            return {metric: -1 for metric in metrics}

        # the standard metrics
        results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
        self._logger.info(
            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
        )

        if class_names is None or len(class_names) <= 1:
            return results
        # Compute per-category AP
        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        assert len(class_names) == precisions.shape[2]

        results_per_category = []
        for idx, name in enumerate(class_names):
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category.append(("{}".format(name), float(ap * 100)))

        # tabulate it
        N_COLS = min(6, len(results_per_category) * 2)
        results_flatten = list(itertools.chain(*results_per_category))
        results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
        table = tabulate(
            results_2d,
            tablefmt="pipe",
            floatfmt=".3f",
            headers=["category", "AP"] * (N_COLS // 2),
            numalign="left",
        )
        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)

        results.update({"AP-" + name: ap for name, ap in results_per_category})
        return results
コード例 #7
0
ファイル: oideval.py プロジェクト: xychen9459/UniDet
def _evaluate_predictions_on_oid(oid_gt, oid_results_path, eval_seg=False):
    logger = logging.getLogger(__name__)
    metrics = ["AP50", "AP50_expand"]

    results = {}
    oid_eval = OIDEval(oid_gt,
                       oid_results_path,
                       'bbox',
                       expand_pred_label=False)
    oid_eval.run()
    oid_eval.print_results()
    results["AP50"] = oid_eval.get_results()["AP50"]

    if eval_seg:
        oid_eval = OIDEval(oid_gt,
                           oid_results_path,
                           'segm',
                           expand_pred_label=False)
        oid_eval.run()
        oid_eval.print_results()
        results["AP50_segm"] = oid_eval.get_results()["AP50"]
    else:
        oid_eval = OIDEval(oid_gt,
                           oid_results_path,
                           'bbox',
                           expand_pred_label=True)
        oid_eval.run()
        oid_eval.print_results()
        results["AP50_expand"] = oid_eval.get_results()["AP50"]

    logger.info("Evaluation results for bbox: \n" + \
        create_small_table(results))
    return results
コード例 #8
0
    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or List): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.
        Returns:
            a dict of {metric name: score} used by print_csv_format to print and saved in trainer storage
        """
        results = {}
        # metrics = ["AP", "mMR", "Recall"] # add FPPI etc
        metrics = {
            "bbox": ["mAP", "mMR", "max_recall", "fppi0.01", "fppi0.1", "fppi1.0"],
        }[iou_type]

        if coco_eval is None:
            self._logger.warning("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # the standard metrics
        results = {
            k: v for k, v in coco_eval.items() if 'total' in k
        }
        # results = {metric: coco_eval[idx]
        #            for idx, metric in enumerate(metrics)}
        small_table = create_small_table(results)
        self._logger.info(
            "Evaluation results for {}: \n".format(iou_type) + small_table
        )
        # print(coco_eval)

        return results
コード例 #9
0
    def evaluate(self):
        if self._distributed:
            synchronize()
            endpoint_errors = all_gather(self._endpoint_errors)
            endpoint_errors = [per_image for per_gpu in endpoint_errors for per_image in per_gpu]
            self._predictions = all_gather(self._predictions)
            if not is_main_process():
                return

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir, "flow_predictions.json")
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._predictions))

        ave_epe = sum(endpoint_errors) / len(endpoint_errors)
        res = {"ave_epe": ave_epe}

        if self._output_dir:
            file_path = os.path.join(self._output_dir, "flow_evaluation.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(res, f)

        results = OrderedDict({"flow": res})
        small_table = create_small_table(res)
        self._logger.info("Evaluation results for flow: \n" + small_table)
        dump_info_one_task = {
            "task": "flow",
            "tables": [small_table],
        }
        _dump_to_markdown([dump_info_one_task])
        return results
コード例 #10
0
    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            self._predictions = comm.gather(self._predictions, dst=0)
            self._predictions = list(itertools.chain(*self._predictions))

            self.submit_results = comm.gather(self.submit_results, dst=0)
            self.submit_results = list(itertools.chain(*self.submit_results))

            if not comm.is_main_process():
                return {}

        if len(self._predictions) == 0:
            self._logger.warning(
                "[COCOEvaluator] Did not receive valid predictions.")
            return {}

        self._logger.info("Preparing results for COCO format ...")
        self._coco_results = list(
            itertools.chain(*[x["instances"] for x in self._predictions]))
        if self._output_dir:
            res_file = os.path.join(self._output_dir,
                                    "crowdhuman_evaluate_results.json")
            self._logger.info("Saving results to {}".format(res_file))
            with PathManager.open(res_file, "w") as f:
                f.write(json.dumps(self._coco_results))
                f.flush()

            self._logger.info("Saving results to {}".format(res_file))
            submit_file = os.path.join(self._output_dir, "submission.txt")
            with PathManager.open(submit_file, "w") as f:
                for result in self.submit_results:
                    f.write(json.dumps(result))
                    f.write("\n")
                f.flush()

        self._logger.info("Evaluating predictions ...")

        metrics = ["ALL"]
        results = {}
        ret_results = OrderedDict()
        for gt_json in [self._metadata.gt_file]:
            name = gt_json.split("/")[-1].split(".")[0]
            for id_setup in range(len(metrics)):
                cocoGt = COCO(gt_json)
                cocoDt = cocoGt.loadRes(res_file)
                imgIds = sorted(cocoGt.getImgIds())
                cocoEval = CrowdHumanEval(cocoGt, cocoDt, "bbox")
                cocoEval.params.imgIds = imgIds
                cocoEval.evaluate(id_setup)
                cocoEval.accumulate()
                performance_dict = cocoEval.summarize(id_setup)
                for key in performance_dict.keys():
                    results[name + " " + key] = performance_dict[key]
        self._logger.info(
            "Evaluation results for Pedestrian Detection on CrowdHuman: \n" +
            create_small_table(results))
        ret_results["PedestrianDetection"] = copy.deepcopy(results)
        return ret_results
コード例 #11
0
def _evaluate_predictions_on_lvis(
    lvis_gt, lvis_results, iou_type, max_dets=None, class_names=None
):
    """
    Copied from detectron2.evaluation.lvis_evaluation, with support for max_dets.

    Args:
        iou_type (str):
        kpt_oks_sigmas (list[float]):
        max_dets (None or int)
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = {
        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model!")
        return {metric: float("nan") for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        # When evaluating mask AP, if the results contain bbox, LVIS API will
        # use the box area as the area of the instance, instead of the mask area.
        # This leads to a different definition of small/medium/large.
        # We remove the bbox field to let mask AP use mask area.
        for c in lvis_results:
            c.pop("bbox", None)

    from lvis import LVISEval, LVISResults

    #####
    # <modified>
    if max_dets is None:
        max_dets = 300

    lvis_results_obj = LVISResults(lvis_gt, lvis_results, max_dets=max_dets)
    lvis_eval = LVISEval(lvis_gt, lvis_results_obj, iou_type)
    lvis_eval.params.max_dets = max_dets
    # </modified>
    #####
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info(
        f"Evaluation results for {iou_type}, max_dets {max_dets} \n"
        + create_small_table(results)
    )
    return results
コード例 #12
0
def _evaluate_predictions_on_lvis(lvis_gt,
                                  lvis_results,
                                  iou_type,
                                  max_dets_per_image=None,
                                  class_names=None):
    """
    Args:
        iou_type (str):
        max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
            This limit, by default of the LVIS dataset, is 300.
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = {
        "bbox":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model!")
        return {metric: float("nan") for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        # When evaluating mask AP, if the results contain bbox, LVIS API will
        # use the box area as the area of the instance, instead of the mask area.
        # This leads to a different definition of small/medium/large.
        # We remove the bbox field to let mask AP use mask area.
        for c in lvis_results:
            c.pop("bbox", None)

    if max_dets_per_image is None:
        max_dets_per_image = 300  # Default for LVIS dataset

    from lvis import LVISEval, LVISResults

    logger.info(
        f"Evaluating with max detections per image = {max_dets_per_image}")
    lvis_results = LVISResults(lvis_gt,
                               lvis_results,
                               max_dets=max_dets_per_image)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info("Evaluation results for {}: \n".format(iou_type) +
                create_small_table(results))
    return results
コード例 #13
0
ファイル: evaluator.py プロジェクト: xiaohu2015/detectron2
def _derive_results_from_coco_eval(coco_eval, eval_mode_name, metrics,
                                   class_names, min_threshold, img_ids):
    if img_ids is not None:
        coco_eval.params.imgIds = img_ids
    coco_eval.params.iouThrs = np.linspace(
        min_threshold,
        0.95,
        int(np.round((0.95 - min_threshold) / 0.05)) + 1,
        endpoint=True)
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    results = {
        metric: float(coco_eval.stats[idx] * 100)
        for idx, metric in enumerate(metrics)
    }
    logger = logging.getLogger(__name__)
    logger.info(
        f"Evaluation results for densepose, {eval_mode_name} metric: \n" +
        create_small_table(results))
    if class_names is None or len(class_names) <= 1:
        return results

    # Compute per-category AP, the same way as it is done in D2
    # (see detectron2/evaluation/coco_evaluation.py):
    precisions = coco_eval.eval["precision"]
    # precision has dims (iou, recall, cls, area range, max dets)
    assert len(class_names) == precisions.shape[2]

    results_per_category = []
    for idx, name in enumerate(class_names):
        # area range index 0: all area ranges
        # max dets index -1: typically 100 per image
        precision = precisions[:, :, idx, 0, -1]
        precision = precision[precision > -1]
        ap = np.mean(precision) if precision.size else float("nan")
        results_per_category.append((f"{name}", float(ap * 100)))

    # tabulate it
    n_cols = min(6, len(results_per_category) * 2)
    results_flatten = list(itertools.chain(*results_per_category))
    results_2d = itertools.zip_longest(
        *[results_flatten[i::n_cols] for i in range(n_cols)])
    table = tabulate(
        results_2d,
        tablefmt="pipe",
        floatfmt=".3f",
        headers=["category", "AP"] * (n_cols // 2),
        numalign="left",
    )
    logger.info(f"Per-category {eval_mode_name} AP: \n" + table)

    results.update({"AP-" + name: ap for name, ap in results_per_category})
    return results
コード例 #14
0
    def _eval_box_proposals(self, predictions):
        """
        Evaluate the box proposals in predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, interactness_logits = [], [], []
            for prediction in predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                interactness_logits.append(
                    prediction["proposals"].interactness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "interactness_logits": interactness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            logger.info("Annotations are not available for evaluation.")
            return

        logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 500]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(predictions,
                                                self._coco_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
                for sub_key in ["", "_known", "_novel"]:
                    key = "R{}{}@{:d}+IoU=0.5".format(suffix, sub_key, limit)
                    res[key] = float(
                        stats["recalls{}".format(sub_key)][0].item() * 100)
                    print(" R{}{}@{:d}[email protected] = {:.3f}".format(
                        suffix, sub_key, limit, res[key]))

        logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
コード例 #15
0
    def _eval_box_proposals(self):
        """
        Evaluate the box proposals in self._predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """

        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            print(bbox_mode)

            ids, boxes, objectness_logits = [], [], []
            for prediction in self._predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            #pdb.set_trace()
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(self._predictions,
                                                self._coco_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
        self._logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
コード例 #16
0
    def _eval_affinity(self, predictions):
        """
        Evaluate plane correspondence.
        """
        logger.info("Evaluating embedding affinity ...")
        labels = []
        preds = []
        for pred in predictions:
            labels.extend(pred["labels"])
            preds.extend(pred["preds"])

        best_auc_ipaa = 0
        best_threshold = 0
        best_ipaa_dict = None
        for th in predictions[0]["ipaa_by_threshold"].keys():
            IPAA_dict = {}
            for i in range(11):
                IPAA_dict[i * 10] = 0
            for pred in predictions:
                for key in IPAA_dict.keys():
                    if pred["ipaa_by_threshold"][th] >= key / 100:
                        IPAA_dict[key] += 1
            auc_ipaa = compute_auc(IPAA_dict)
            if auc_ipaa > best_auc_ipaa:
                best_auc_ipaa = auc_ipaa
                best_threshold = th
                best_ipaa_dict = IPAA_dict

        if not len(labels):
            return
        auc = roc_auc_score(labels, preds) * 100
        ap = average_precision_score(labels, preds) * 100
        if best_ipaa_dict is None:
            results = {
                f"ap@iou={self._filter_iou}": ap,
                f"auc@iou={self._filter_iou}": auc,
            }
        else:
            results = {
                f"ap@iou={self._filter_iou}": ap,
                f"auc@iou={self._filter_iou}": auc,
                f"ipaa-80": best_ipaa_dict[80] / len(predictions),
                f"ipaa-90": best_ipaa_dict[90] / len(predictions),
                f"ipaa-100": best_ipaa_dict[100] / len(predictions),
                f"auc-ipaa": best_auc_ipaa,
                f"hungarian-threshold": best_threshold,
            }
        logger.info("Affinity metrics: \n" + create_small_table(results))
        self._results.update(results)
コード例 #17
0
    def _calculate_accuracy_recall(self, gts, preds):
        cat = self._metadata.get("classification_classes", None)
        num_cls = len(cat)
        assert len(preds) > 0 and len(gts)
        image_to_idx = {}
        gt_cls_count = [0 for i in range(num_cls + 1)]
        for i, c in enumerate(gts):
            image_to_idx[c["image_id"]] = i
            gt_cls_count[c["category2_id"]] += 1

        count, c_model, c_part, c_toward = 0, 0, 0, 0
        cls_count = [0 for i in range(num_cls + 1)]
        for pd in preds:
            gt_idx = image_to_idx[pd["image_id"]]
            gt = gts[gt_idx]
            if gt["category2_id"] == pd["category_id"]:
                count += 1
                cls_count[pd["category_id"]] += 1
            else:
                if self._view_error:
                    self._vis_result(gt, pd)

            if gt["category2_id"] == 2:
                c_model += 1
                if pd["toward"] == gt["toward"]:
                    c_toward += 1
                if pd["part"] == gt["part"]:
                    c_part += 1

        accuracy = float(count) / (float(len(gts)) + 0.0001) * 100
        part_accuracy = float(c_part) / (float(c_model) + 0.0001) * 100
        toward_accuracy = float(c_toward) / (float(c_model) + 0.0001) * 100
        cls_acc = {}
        for i, (pd, gt) in enumerate(zip(cls_count, gt_cls_count)):
            if not gt == 0:
                cls_acc[cat[i - 1] +
                        "_acc"] = float(pd) / (float(gt) + 0.0001) * 100

        results = {
            "accuracy": accuracy,
            "part_accuracy": part_accuracy,
            "toward_accuracy": toward_accuracy
        }
        results.update(cls_acc)

        self._logger.info("Evaluation results for classification: \n" +
                          create_small_table(results))

        return results
コード例 #18
0
    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            self._predictions = comm.gather(self._predictions, dst=0)
            self._predictions = list(itertools.chain(*self._predictions))

            if not comm.is_main_process():
                return {}

        if len(self._predictions) == 0:
            self._logger.warning("[ClassificationEvaluator] Did not receive valid predictions.")
            return {}

        pred_nums = [0] * len(self._metadata.classes)
        gt_nums = [0] * len(self._metadata.classes)
        correct_nums = [0] * len(self._metadata.classes)
        for p in self._predictions:
            if p['gt_class_id'] >= 0:
                gt_nums[p['gt_class_id']] += 1
                if p['gt_class_id'] == p['pred_class_id']:
                    correct_nums[p['gt_class_id']] += 1
            if p['pred_class_id'] >= 0:
                pred_nums[p['pred_class_id']] += 1
        result = {}
        eps = 0.00001
        for i, cls in enumerate(self._metadata.classes):
            idx = self._metadata.class_to_idx[cls]
            acc = correct_nums[idx] / (pred_nums[idx] + eps)
            recall = correct_nums[idx] / (gt_nums[idx] + eps)
            result.update({
                cls + "_acc": acc,
                cls + "_recall": recall
            })
        total_acc = sum(correct_nums) / (sum(pred_nums) + eps)
        total_recall = sum(correct_nums) / (sum(gt_nums) + eps)
        result.update({
            "total_acc": total_acc,
            "total_recall": total_recall
        })
        self._logger.info(
            "Evaluation results for classification: \n" + create_small_table(result)
        )
        results = OrderedDict()
        results["classification"] = result
        return results
コード例 #19
0
ファイル: evaluator.py プロジェクト: veraposeidon/detectron2
def _evaluate_predictions_on_coco(coco_gt, coco_results):
    metrics = ["AP", "AP50", "AP75", "APm", "APl"]

    logger = logging.getLogger(__name__)

    if len(coco_results) == 0:  # cocoapi does not handle empty results very well
        logger.warn("No predictions from the model! Set scores to -1")
        return {metric: -1 for metric in metrics}

    coco_dt = coco_gt.loadRes(coco_results)
    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    # the standard metrics
    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
    logger.info("Evaluation results for densepose: \n" + create_small_table(results))
    return results
コード例 #20
0
    def _evaluate_predictions_ar(self, predictions):
        res = {}
        aspect_ratios = {
            "all ratios": [0 / 1, 1e5 / 1],
            " 0  - 1/5": [0 / 1, 1 / 5],
            "1/5 - 1/3": [1 / 5, 1 / 3],
            "1/3 - 3/1": [1 / 3, 3 / 1],
            "3/1 - 5/1": [3 / 1, 5 / 1],
            "5/1 - INF": [5 / 1, 1e5 / 1],
        }
        areas = {
            "all areas": [0, float("inf")],
            "small": [0, 32 ** 2],
            "medium": [32 ** 2, 96 ** 2],
            "large": [96 ** 2, float("inf")]
        }
        limits = [100]
        for limit in limits:
            stats = _evaluate_predictions_ar(
                predictions,
                self._coco_api,
                self._metadata,
                aspect_ratios=aspect_ratios,
                areas=areas,
                limit=limit)
            recalls = stats.pop("recalls")
            for i, key in enumerate(areas):
                res["AR-{}@{:d}".format(key, limit)] = recalls[:, -1, 0, i].mean() * 100
                res["mAR-{}@{:d}".format(key, limit)] = recalls[:, :-1, 0, i].mean() * 100

            for i, key in enumerate(aspect_ratios):
                res["AR-{}@{:d}".format(key, limit)] = recalls[:, -1, i, 0].mean() * 100
                res["mAR-{}@{:d}".format(key, limit)] = recalls[:, :-1, i, 0].mean() * 100

            key = "AR@{:d}".format(limit)
            res[key] = float(stats["ar"].item() * 100)
            key = "mAR@{:d}".format(limit)
            res[key] = float(stats["mar"].item() * 100)

        print("Proposal metrics: \n" + create_small_table(res))
        # stats["recalls"] = recalls
        res["ar-stats"] = stats
        self._results["ar"] = res
コード例 #21
0
def main(predictions_file_path,
         json_file="datasets/coco/annotations/instances_val2017.json",
         oriented=False):
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)

    with open(predictions_file_path, mode="rb") as fp:
        predictions = torch.load(fp)

    print(len(predictions))
    res = {}
    if oriented:
        aspect_ratios = {
            "all": (0, 1),
            "0-0.2": (0, 0.2),
            "0.2-0.3*": (0.2, 1 / 3),
            "0.3*-1": (0.3, 1),
        }

    else:
        aspect_ratios = {
            "all": [0 / 1, 1000 / 1],
            "l1": [0 / 1, 1 / 5],
            "l2": [1 / 5, 1 / 3],
            "l3": [1 / 3, 3 / 1],
            "l4": [3 / 1, 5 / 1],
            "l5": [5 / 1, 1000 / 1],
        }
    num_pos_dict = dict()
    limits = [100]
    for limit in limits:
        for name, ratio_range in aspect_ratios.items():
            stats = evaluate_box_proposal(predictions,
                                          coco_api,
                                          aspect_ratio_range=ratio_range,
                                          limit=limit,
                                          oriented=oriented)
            key = "AR{}@{:d}".format(name, limit)
            res[key] = float(stats["ar"].item() * 100)
            num_pos_dict[name] = stats["num_pos"]

    print("Proposal metrics: \n" + create_small_table(res))
コード例 #22
0
    def evaluate(self):
        """
        Compute evaluation metrics based on accumulated data.

        Returns:
            dict: keys are [ErrorRate, Accuracy, Precision, Recall, Specificity]
        """

        predictions = self._predictions

        if len(predictions['gt_cls']) == 0:
            self._logger.warning("[BinaryClassificationEvaluator] Did not receive valid predictions.")
            return {}

        if self.validate:
            pred_tensor = torch.stack(predictions['logits'], dim=0)
            gt_tensor = torch.LongTensor(predictions['gt_cls'])
            loss = nn.CrossEntropyLoss()(pred_tensor, gt_tensor)

        # if self._output_dir:
        #     PathManager.mkdirs(self._output_dir)
        #     file_path = os.path.join(self._output_dir, "class_predictions.pth")
        #     with PathManager.open(file_path, "wb") as f:
        #         torch.save(predictions, f)

        gt_ = np.array(predictions['gt_cls'])
        pred_ = np.array(predictions['pred_cls'])
        
        total = len(gt_)

        tp_ = np.logical_and(                gt_,                 pred_) # gt_ = 1 AND pred_ = 1
        fp_ = np.logical_and(np.logical_not(gt_),                 pred_) # gt_ = 0 AND pred_ = 1
        fn_ = np.logical_and(gt_                , np.logical_not(pred_)) # gt_ = 1 AND pred_ = 0
        tn_ = np.logical_and(np.logical_not(gt_), np.logical_not(pred_)) # gt_ = 0 AND pred_ = 0
        
        tp = np.sum(tp_)
        fp = np.sum(fp_)
        fn = np.sum(fn_)
        tn = np.sum(tn_)

        P = tp + fn
        N = tn + fp

        self._results = OrderedDict()
        class_names = self._metadata.thing_classes # Indicate which class is Positive
        
        self._results['ErrorRate'] = (fp + fn) / ((N + P) + 1e-5)
        self._results['Accuracy'] = (tp + tn) / ((N + P) + 1e-5)
        self._results['Precision'] = tp / ((tp + fp) + 1e-5)
        self._results['Recall'] = tp / (P + 1e-5)
        self._results['Specificity'] = 1 - fp / (N + 1e-5)

        results = {
            key: float(value * 100 if self._results[key] >= 0 else "nan")
            for key, value in self._results.items()
        }
        results[class_names[1]+'(P)'] = P
        results[class_names[0]+'(N)'] = N

        self._results[class_names[1]+'(P)'] = P
        self._results[class_names[0]+'(N)'] = N

        if self.validate:
            self._results['loss_cls'] = loss

        self._logger.info(
            "Evaluation results for classification: \n" + create_small_table(results)
        )
        if not np.isfinite(sum(self._results.values())):
            self._logger.info("Note that some metrics cannot be computed.")

        return copy.deepcopy(self._results)
    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            self._logger.warn(
                "No predictions from the model! Set scores to -1")
            return {metric: -1 for metric in metrics}

        # the standard metrics
        results = {
            metric: float(coco_eval.stats[idx] * 100)
            for idx, metric in enumerate(metrics)
        }
        self._logger.info("Evaluation results for {}: \n".format(iou_type) +
                          create_small_table(results))

        if class_names is None or len(class_names) <= 1:
            return results
        # Compute per-category AP
        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        assert len(class_names) == precisions.shape[2]

        results_per_category = []
        for idx, name in enumerate(class_names):
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category.append(("{}".format(name), float(ap * 100)))

        histogram = np.load(
            os.path.join(global_cfg.OUTPUT_DIR,
                         f'histogram_{global_cfg.DATASETS.TEST[0]}.npy'))
        ind_sorted = np.argsort(histogram)[::-1]
        a = np.array(results_per_category)[ind_sorted]
        bins = range(len(class_names))

        fig = plt.figure(figsize=(10, 8))
        plt.bar(bins, height=a[:, 1].astype(float), color='#F6CD61')
        plt.xticks(bins,
                   np.array(class_names)[ind_sorted],
                   rotation=90,
                   fontsize=5)
        plt.ylim(bottom=0, top=100)
        storage = get_event_storage()
        storage.put_fig("AP", fig)
        if global_cfg.MODEL.GAMBLER_HEAD.SAVE_VIS_FILES is True:
            fig.savefig(
                os.path.join(global_cfg.OUTPUT_DIR,
                             "AP_" + str(storage.iter) + ".pdf"))
        plt.close('all')

        # tabulate it
        N_COLS = min(6, len(results_per_category) * 2)
        results_flatten = list(itertools.chain(*results_per_category))
        results_2d = itertools.zip_longest(
            *[results_flatten[i::N_COLS] for i in range(N_COLS)])
        table = tabulate(
            results_2d,
            tablefmt="pipe",
            floatfmt=".3f",
            headers=["category", "AP"] * (N_COLS // 2),
            numalign="left",
        )
        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)

        results.update({"AP-" + name: ap for name, ap in results_per_category})
        return results
コード例 #24
0
    def _derive_coco_results(
        self,
        coco_eval,
        iou_type,
        iouThr=None,
        class_names=None,
        known_classes=None,
        novel_classes=None,
    ):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            logger.warn("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # the standard metrics
        results = {
            metric: float(coco_eval.stats[idx] *
                          100 if coco_eval.stats[idx] >= 0 else "nan")
            for idx, metric in enumerate(metrics)
        }
        logger.info("Evaluation results for {}: \n".format(iou_type) +
                    create_small_table(results))
        if not np.isfinite(sum(results.values())):
            logger.info("Note that some metrics cannot be computed.")

        if class_names is None or len(class_names) <= 1:
            return results
        if "person" in known_classes:
            known_classes.remove("person")
        # Compute per-category AP
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        assert len(class_names) == precisions.shape[2]

        results_per_category = []
        results_known_category = []  # Exclude "person" category
        results_novel_category = []
        for idx, name in enumerate(class_names):
            # iou threshold index t: 0.5:0.05:0.9
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            if iouThr is not None:
                t = np.where(iouThr == coco_eval.params.iouThrs)[0]
                precision = precisions[t, :, idx, 0, -1]
            else:
                precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category.append(("{}".format(name), float(ap * 100)))
            if name in known_classes:
                results_known_category.append(ap * 100)
            if name in novel_classes:
                results_novel_category.append(ap * 100)

        str_suffix = "{:d}".format(int(iouThr * 100)) if iouThr else ""
        results_known_novel_split = {
            "AP{}-total".format(str_suffix): np.mean(results_known_category+results_novel_category),
            "AP{}-known".format(str_suffix): np.mean(results_known_category),
            "AP{}-novel".format(str_suffix): np.mean(results_novel_category) \
                if len(results_novel_category) else "nan"
        }

        # tabulate it
        N_COLS = min(6, len(results_per_category) * 2)
        results_flatten = list(itertools.chain(*results_per_category))
        results_2d = itertools.zip_longest(
            *[results_flatten[i::N_COLS] for i in range(N_COLS)])
        table = tabulate(
            results_2d,
            tablefmt="pipe",
            floatfmt=".3f",
            headers=["category", "AP"] * (N_COLS // 2),
            numalign="left",
        )
        logger.info("Per-category {} AP: \n".format(iou_type) + table)
        logger.info("Evaluation results for {}: \n".format(iou_type) +
                    create_small_table(results))
        logger.info(
            "Evaluation results for {} known/novel splits: \n".format(iou_type) + \
            create_small_table(results_known_novel_split)
        )
        results.update({"AP-" + name: ap for name, ap in results_per_category})
        return results
コード例 #25
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
        """
        all_predictions = comm.gather(self._predictions, dst=0)
        if not comm.is_main_process():
            return
        predictions = defaultdict(list)
        for predictions_per_rank in all_predictions:
            for clsid, lines in predictions_per_rank.items():
                predictions[clsid].extend(lines)
        del all_predictions

        self._logger.info(
            "Evaluating {} using {} metric. "
            "Note that results do not use the official Matlab API.".format(
                self._dataset_name, 2007 if self._is_2007 else 2012))

        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
            res_file_template = os.path.join(dirname, "{}.txt")

            aps = defaultdict(list)  # iou -> ap per class
            aps_base = defaultdict(list)
            aps_novel = defaultdict(list)
            exist_base, exist_novel = False, False
            for cls_id, cls_name in enumerate(self._class_names):
                lines = predictions.get(cls_id, [""])

                with open(res_file_template.format(cls_name), "w") as f:
                    f.write("\n".join(lines))

                for thresh in range(50, 100, 5):
                    rec, prec, ap = voc_eval(
                        res_file_template,
                        self._anno_file_template,
                        self._image_set_path,
                        cls_name,
                        ovthresh=thresh / 100.0,
                        use_07_metric=self._is_2007,
                    )
                    aps[thresh].append(ap * 100)

                    if self._base_classes is not None and cls_name in self._base_classes:
                        aps_base[thresh].append(ap * 100)
                        exist_base = True

                    if self._novel_classes is not None and cls_name in self._novel_classes:
                        aps_novel[thresh].append(ap * 100)
                        exist_novel = True

        ret = OrderedDict()
        mAP = {iou: np.mean(x) for iou, x in aps.items()}
        ret["bbox"] = {
            "AP": np.mean(list(mAP.values())),
            "AP50": mAP[50],
            "AP75": mAP[75]
        }

        # adding evaluation of the base and novel classes
        if exist_base:
            mAP_base = {iou: np.mean(x) for iou, x in aps_base.items()}
            ret["bbox"].update({
                "bAP": np.mean(list(mAP_base.values())),
                "bAP50": mAP_base[50],
                "bAP75": mAP_base[75]
            })

        if exist_novel:
            mAP_novel = {iou: np.mean(x) for iou, x in aps_novel.items()}
            ret["bbox"].update({
                "nAP": np.mean(list(mAP_novel.values())),
                "nAP50": mAP_novel[50],
                "nAP75": mAP_novel[75]
            })

        # write per class AP to logger
        per_class_res = {
            self._class_names[idx]: ap
            for idx, ap in enumerate(aps[50])
        }

        self._logger.info("Evaluate per-class mAP50:\n" +
                          create_small_table(per_class_res))
        self._logger.info("Evaluate overall bbox:\n" +
                          create_small_table(ret["bbox"]))
        return ret
コード例 #26
0
 def _eval_depth(self, predictions):
     depth_l1_dist = [p["depth_l1_dist"] for p in predictions]
     result = {f"depth_l1_dist": np.mean(depth_l1_dist)}
     logger.info("Depth metrics: \n" + create_small_table(result))
     self._results.update(result)
コード例 #27
0
    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            self._logger.warn("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # the standard metrics
        results = {
            metric: float(coco_eval.stats[idx] *
                          100 if coco_eval.stats[idx] >= 0 else "nan")
            for idx, metric in enumerate(metrics)
        }
        self._logger.info("Evaluation results for {}: \n".format(iou_type) +
                          create_small_table(results))
        if not np.isfinite(sum(results.values())):
            self._logger.info("Note that some metrics cannot be computed.")

        if class_names is None or len(class_names) <= 1:
            return results
        # Compute per-category AP
        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        assert len(class_names) == precisions.shape[2]

        results_per_category = []
        # TODO(): Rewrite this more modularly
        results_per_category_AP50 = []
        for idx, name in enumerate(class_names):
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category.append(("{}".format(name), float(ap * 100)))

            # Compute for AP50
            # 0th first index is IOU .50
            precision = precisions[0, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category_AP50.append(
                ("{}".format(name), float(ap * 100)))

        table = _tabulate_per_category(results_per_category)
        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)

        tableAP50 = _tabulate_per_category(results_per_category_AP50, "AP50")
        self._logger.info("Per-category {} AP50: \n".format(iou_type) +
                          tableAP50)

        results.update({"AP-" + name: ap for name, ap in results_per_category})
        # Update AP50
        results.update(
            {"AP50-" + name: ap
             for name, ap in results_per_category_AP50})
        return results
コード例 #28
0
    def _eval_box_proposals(self, predictions):
        """
        Evaluate the box proposals in predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, objectness_logits = [], [], []
            for prediction in predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(predictions,
                                                self._coco_api,
                                                area=area,
                                                limit=limit,
                                                classes=self.classes_to_eval)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)

        def coco_clsid_to_name(clsid, metadata):
            thing_classes = metadata.thing_classes
            coco_id_to_contiguous_id = metadata.thing_dataset_id_to_contiguous_id
            return thing_classes[coco_id_to_contiguous_id[clsid]]

        results_per_category = []
        limit = 1000
        area, suffix = 'all', ""
        for cls_id in self.classes_to_eval:
            class_name = coco_clsid_to_name(cls_id, self._metadata)
            self._logger.info(f"Result for cls_id {class_name}: {cls_id}")
            stats = _evaluate_box_proposals(predictions,
                                            self._coco_api,
                                            area=area,
                                            limit=limit,
                                            classes=[cls_id])
            key = "AR{}@{:d}-{}".format(suffix, limit, class_name)
            value = float(stats["ar"].item() * 100)
            results_per_category.append((key, value))

        print(results_per_category)
        print(np.mean([ap for _, ap in results_per_category]))

        self._logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
コード例 #29
0
    def _eval_camera(self, predictions):
        acc_threshold = {
            "tran": 1.0,
            "rot": 30,
        }  # threshold for translation and rotation error to say prediction is correct.
        tran_logits = torch.stack(
            [p["camera"]["logits"]["tran"] for p in predictions]).numpy()
        rot_logits = torch.stack(
            [p["camera"]["logits"]["rot"] for p in predictions]).numpy()
        gt_tran_cls = torch.stack(
            [p["camera"]["gts"]["tran_cls"] for p in predictions]).numpy()
        gt_rot_cls = torch.stack(
            [p["camera"]["gts"]["rot_cls"] for p in predictions]).numpy()
        gt_tran = np.vstack([p["camera"]["gts"]["tran"] for p in predictions])
        gt_rot = np.vstack([p["camera"]["gts"]["rot"] for p in predictions])
        topk_acc = get_camera_top_k_acc(
            logits={
                "tran": tran_logits,
                "rot": rot_logits
            },
            gts={
                "tran_cls": gt_tran_cls,
                "rot_cls": gt_rot_cls
            },
            n_clusters={
                "tran": self.kmeans_trans.n_clusters,
                "rot": self.kmeans_rots.n_clusters,
            },
        )
        topk_acc["tran"] = np.cumsum(topk_acc["tran"]) / np.sum(
            topk_acc["tran"])
        topk_acc["rot"] = np.cumsum(topk_acc["rot"]) / np.sum(topk_acc["rot"])
        pred_tran = self.class2xyz(np.argmax(tran_logits, axis=1))
        pred_rot = self.class2quat(np.argmax(rot_logits, axis=1))

        top1_error = {
            "tran": np.linalg.norm(gt_tran - pred_tran, axis=1),
            "rot": angle_error_vec(pred_rot, gt_rot),
        }
        top1_accuracy = {
            "tran": (top1_error["tran"] < acc_threshold["tran"]).sum() /
            len(top1_error["tran"]),
            "rot": (top1_error["rot"] < acc_threshold["rot"]).sum() /
            len(top1_error["rot"]),
        }
        camera_metrics = {
            f"top1 T err < {acc_threshold['tran']}":
            top1_accuracy["tran"] * 100,
            f"top1 R err < {acc_threshold['rot']}": top1_accuracy["rot"] * 100,
            f"T mean err": np.mean(top1_error["tran"]),
            f"R mean err": np.mean(top1_error["rot"]),
            f"T median err": np.median(top1_error["tran"]),
            f"R median err": np.median(top1_error["rot"]),
        }
        logger.info("Camera metrics: \n" + create_small_table(camera_metrics))
        topk_metrics = {
            f"top1 T acc": topk_acc["tran"][0] * 100,
            f"top5 T acc": topk_acc["tran"][4] * 100,
            f"top10 T acc": topk_acc["tran"][9] * 100,
            f"top32 T acc": topk_acc["tran"][31] * 100,
            f"top1 R acc": topk_acc["rot"][0] * 100,
            f"top5 R acc": topk_acc["rot"][4] * 100,
            f"top10 R acc": topk_acc["rot"][9] * 100,
            f"top32 R acc": topk_acc["rot"][31] * 100,
        }
        logger.info("Camera topk: \n" + create_small_table(topk_metrics))
        camera_metrics.update(topk_metrics)
        self._results.update(camera_metrics)
        summary = {
            "errors":
            np.array([top1_error["tran"], top1_error["rot"]]),
            "preds": {
                "tran": pred_tran,
                "rot": pred_rot,
                "tran_cls": np.argmax(tran_logits, axis=1).reshape(-1, 1),
                "rot_cls": np.argmax(rot_logits, axis=1).reshape(-1, 1),
            },
            "gts": {
                "tran": gt_tran,
                "rot": gt_rot,
                "tran_cls": gt_tran_cls,
                "rot_cls": gt_rot_cls,
            },
            "logits_sms": {
                "tran": softmax(tran_logits, axis=1),
                "rot": softmax(rot_logits, axis=1),
            },
            "accuracy": [top1_accuracy["tran"], top1_accuracy["rot"]],
            "keys":
            [p["0"]["file_name"] + p["1"]["file_name"] for p in predictions],
        }
        return summary
コード例 #30
0
def evaluate_for_planes(
    predictions,
    dataset,
    metadata,
    filter_iou,
    iou_thresh=0.5,
    normal_threshold=30,
    offset_threshold=0.3,
    device=None,
):
    if device is None:
        device = torch.device("cpu")
    # classes
    cat_ids = sorted(dataset.getCatIds())
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    # initialize tensors to record box & mask AP, number of gt positives
    box_apscores, box_aplabels = {}, {}
    mask_apscores, mask_aplabels = {}, {}
    plane_apscores, plane_aplabels = {}, {}
    plane_offset_errs, plane_normal_errs = [], []
    npos = {}
    for cat_id in cat_ids:
        box_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        box_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        mask_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        mask_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        plane_apscores[cat_id] = [
            torch.tensor([], dtype=torch.float32, device=device)
        ]
        plane_aplabels[cat_id] = [
            torch.tensor([], dtype=torch.uint8, device=device)
        ]
        npos[cat_id] = 0.0

    # number of gt positive instances per class
    for gt_ann in dataset.dataset["annotations"]:
        gt_label = gt_ann["category_id"]
        npos[gt_label] += 1.0

    for prediction in predictions:
        original_id = prediction["image_id"]
        image_width = dataset.loadImgs([original_id])[0]["width"]
        image_height = dataset.loadImgs([original_id])[0]["height"]
        if "instances" not in prediction:
            continue

        num_img_preds = len(prediction["instances"])
        if num_img_preds == 0:
            continue

        # predictions
        scores, boxes, labels, masks_rles = [], [], [], []
        for ins in prediction["instances"]:
            scores.append(ins["score"])
            boxes.append(ins["bbox"])
            labels.append(ins["category_id"])
            masks_rles.append(ins["segmentation"])
        boxes = np.array(boxes)  # xywh from coco
        boxes = BoxMode.convert(boxes, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
        boxes = Boxes(torch.tensor(np.array(boxes))).to(device)
        planes = prediction["pred_plane"]

        # ground truth
        # anotations corresponding to original_id (aka coco image_id)
        gt_ann_ids = dataset.getAnnIds(imgIds=[original_id])
        gt_anns = dataset.loadAnns(gt_ann_ids)
        # get original ground truth mask, box, label & mesh
        gt_boxes, gt_labels, gt_mask_rles, gt_planes = [], [], [], []
        for ann in gt_anns:
            gt_boxes.append(ann["bbox"])
            gt_labels.append(ann["category_id"])
            if isinstance(ann["segmentation"], list):
                polygons = [
                    np.array(p, dtype=np.float64) for p in ann["segmentation"]
                ]
                rles = mask_util.frPyObjects(polygons, image_height,
                                             image_width)
                rle = mask_util.merge(rles)
            elif isinstance(ann["segmentation"], dict):  # RLE
                rle = ann["segmentation"]
            else:
                raise TypeError(
                    f"Unknown segmentation type {type(ann['segmentation'])}!")
            gt_mask_rles.append(rle)
            gt_planes.append(ann["plane"])

        gt_boxes = np.array(gt_boxes)  # xywh from coco
        gt_boxes = BoxMode.convert(gt_boxes, BoxMode.XYWH_ABS,
                                   BoxMode.XYXY_ABS)
        faux_gt_targets = Boxes(
            torch.tensor(gt_boxes, dtype=torch.float32, device=device))

        # box iou
        boxiou = pairwise_iou(boxes, faux_gt_targets)

        # filter predictions with iou > filter_iou
        # valid_pred_ids = (boxiou > filter_iou).sum(axis=1) > 0

        # mask iou
        miou = mask_util.iou(masks_rles, gt_mask_rles, [0] * len(gt_mask_rles))

        plane_metrics = compare_planes(planes, gt_planes)

        # sort predictions in descending order
        scores = torch.tensor(np.array(scores), dtype=torch.float32)
        scores_sorted, idx_sorted = torch.sort(scores, descending=True)
        # record assigned gt.
        box_covered = []
        mask_covered = []
        plane_covered = []

        for pred_id in range(num_img_preds):
            # remember we only evaluate the preds that have overlap more than
            # iou_filter with the ground truth prediction
            # if valid_pred_ids[idx_sorted[pred_id]] == 0:
            #     continue
            # Assign pred to gt
            gt_id = torch.argmax(boxiou[idx_sorted[pred_id]])
            gt_label = gt_labels[gt_id]
            # map to dataset category id
            pred_label = reverse_id_mapping[labels[idx_sorted[pred_id]]]
            pred_miou = miou[idx_sorted[pred_id], gt_id]
            pred_biou = boxiou[idx_sorted[pred_id], gt_id]
            pred_score = scores[idx_sorted[pred_id]].view(1).to(device)

            normal = plane_metrics["norm"][idx_sorted[pred_id], gt_id].item()
            offset = plane_metrics["offset"][idx_sorted[pred_id], gt_id].item()
            plane_offset_errs.append(offset)
            plane_normal_errs.append(normal)

            # mask
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_miou > iou_thresh)
                    and (gt_id not in mask_covered)):
                tpfp[0] = 1
                mask_covered.append(gt_id)
            mask_apscores[pred_label].append(pred_score)
            mask_aplabels[pred_label].append(tpfp)

            # box
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (pred_biou > iou_thresh)
                    and (gt_id not in box_covered)):
                tpfp[0] = 1
                box_covered.append(gt_id)
            box_apscores[pred_label].append(pred_score)
            box_aplabels[pred_label].append(tpfp)

            # plane
            tpfp = torch.tensor([0], dtype=torch.uint8, device=device)
            if ((pred_label == gt_label) and (normal < normal_threshold)
                    and (offset < offset_threshold)
                    and (gt_id not in plane_covered)):
                tpfp[0] = 1
                plane_covered.append(gt_id)
            plane_apscores[pred_label].append(pred_score)
            plane_aplabels[pred_label].append(tpfp)

    # check things for eval
    # assert npos.sum() == len(dataset.dataset["annotations"])
    # convert to tensors
    detection_metrics = {}
    boxap, maskap, planeap = 0.0, 0.0, 0.0
    valid = 0.0
    for cat_id in cat_ids:
        cat_name = dataset.loadCats([cat_id])[0]["name"]
        if npos[cat_id] == 0:
            continue
        valid += 1

        cat_box_ap = VOCap.compute_ap(
            torch.cat(box_apscores[cat_id]),
            torch.cat(box_aplabels[cat_id]),
            npos[cat_id],
        ).item()
        boxap += cat_box_ap
        detection_metrics["box_ap@%.1f - %s" %
                          (iou_thresh, cat_name)] = cat_box_ap

        cat_mask_ap = VOCap.compute_ap(
            torch.cat(mask_apscores[cat_id]),
            torch.cat(mask_aplabels[cat_id]),
            npos[cat_id],
        ).item()
        maskap += cat_mask_ap
        detection_metrics["mask_ap@%.1f - %s" %
                          (iou_thresh, cat_name)] = cat_mask_ap

        cat_plane_ap = VOCap.compute_ap(
            torch.cat(plane_apscores[cat_id]),
            torch.cat(plane_aplabels[cat_id]),
            npos[cat_id],
        ).item()
        planeap += cat_plane_ap
        detection_metrics["plane_ap@iou%.1fnormal%.1foffset%.1f - %s" %
                          (iou_thresh, normal_threshold, offset_threshold,
                           cat_name)] = cat_plane_ap
    detection_metrics["box_ap@%.1f" % iou_thresh] = boxap / valid
    detection_metrics["mask_ap@%.1f" % iou_thresh] = maskap / valid
    detection_metrics["plane_ap@iou%.1fnormal%.1foffset%.1f" %
                      (iou_thresh, normal_threshold, offset_threshold)] = (
                          planeap / valid)
    logger.info("Detection metrics: \n" +
                create_small_table(detection_metrics))
    plane_metrics = {}
    plane_normal_errs = np.array(plane_normal_errs)
    plane_offset_errs = np.array(plane_offset_errs)
    plane_metrics["%normal<10"] = (sum(plane_normal_errs < 10) /
                                   len(plane_normal_errs) * 100)
    plane_metrics["%normal<30"] = (sum(plane_normal_errs < 30) /
                                   len(plane_normal_errs) * 100)
    plane_metrics["%offset<0.5"] = (sum(plane_offset_errs < 0.5) /
                                    len(plane_offset_errs) * 100)
    plane_metrics["%offset<0.3"] = (sum(plane_offset_errs < 0.3) /
                                    len(plane_offset_errs) * 100)
    plane_metrics["mean_normal"] = plane_normal_errs.mean()
    plane_metrics["median_normal"] = np.median(plane_normal_errs)
    plane_metrics["mean_offset"] = plane_offset_errs.mean()
    plane_metrics["median_offset"] = np.median(plane_offset_errs)
    logger.info("Plane metrics: \n" + create_small_table(plane_metrics))
    plane_metrics.update(detection_metrics)
    return plane_metrics