コード例 #1
0
    def _derive_coco_results(self, coco_eval, iou_type):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = ["AP", "mMR", "Recall"]

        if coco_eval is None:
            self._logger.warn(
                "No predictions from the model! Set scores to -1")
            return {metric: -1 for metric in metrics}

        # the standard metrics
        results = {
            metric: coco_eval[idx]
            for idx, metric in enumerate(metrics)
        }
        small_table = create_small_table(results)
        self._logger.info("Evaluation results for {}: \n".format(iou_type) +
                          small_table)

        # if class_names is None or len(class_names) <= 1:
        return results
コード例 #2
0
    def _eval_classification_accuracy(self):
        """
        Evaluate self._predictions on the classification task.
        Fill self._results with the metrics of the tasks.
        """
        batch_size = len(self._targets)

        pred = torch.cat(self._predictions, dim=1)
        target = torch.cat(self._targets)

        correct = pred.eq(target.view(1, -1).expand_as(pred))

        results = {}
        for k in self._topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            results[f"Top_{k} Acc"] = correct_k.mul_(100.0 / batch_size).item()
        self._results["Accuracy"] = results

        small_table = create_small_table(results)
        self._logger.info("Evaluation results for classification: \n" + small_table)

        if self._dump:
            dump_info_one_task = {
                "task": "classification",
                "tables": [small_table],
            }
            self._dump_infos.append(dump_info_one_task)
コード例 #3
0
    def _derive_lvis_results(self, lvis_eval, iou_type, summary):
        """
        Derive the desired score numbers from summarized LVISEval.

        Args:
            lvis_eval (None or LVISEval): None represents no predictions from model.
            iou_type (str): specific evaluation task, optional values are: "bbox", "segm".

        Returns:
            a dict of {metric name: score}
        """
        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        }[iou_type]

        if lvis_eval is None:
            logger.warning("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # Pull the standard metrics from the LVIS results
        results = lvis_eval.get_results()
        results = {metric: float(results[metric] * 100) for metric in metrics}
        small_table = create_small_table(results)
        logger.info("Evaluation results for {}: \n".format(iou_type) + small_table)

        if self._dump:
            dump_info_one_task = {
                "task": iou_type,
                "summary": summary.getvalue(),
                "tables": [small_table],
            }
            self._dump_infos.append(dump_info_one_task)
        return results
コード例 #4
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
        """
        all_predictions = comm.gather(self._predictions, dst=0)
        if not comm.is_main_process():
            return
        predictions = defaultdict(list)
        for predictions_per_rank in all_predictions:
            for clsid, lines in predictions_per_rank.items():
                predictions[clsid].extend(lines)
        del all_predictions

        self._logger.info(
            "Evaluating {} using {} metric. "
            "Note that results do not use the official Matlab API.".format(
                self._dataset_name, 2007 if self._is_2007 else 2012))

        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
            res_file_template = os.path.join(dirname, "{}.txt")

            aps = defaultdict(list)  # iou -> ap per class
            for cls_id, cls_name in enumerate(self._class_names):
                lines = predictions.get(cls_id, [""])

                with open(res_file_template.format(cls_name), "w") as f:
                    f.write("\n".join(lines))

                for thresh in range(50, 100, 5):
                    rec, prec, ap = voc_eval(
                        res_file_template,
                        self._anno_file_template,
                        self._image_set_path,
                        cls_name,
                        ovthresh=thresh / 100.0,
                        use_07_metric=self._is_2007,
                    )
                    aps[thresh].append(ap * 100)

        ret = OrderedDict()
        mAP = {iou: np.mean(x) for iou, x in aps.items()}
        ret["bbox"] = {
            "AP": np.mean(list(mAP.values())),
            "AP50": mAP[50],
            "AP75": mAP[75]
        }

        small_table = create_small_table(ret["bbox"])
        self._logger.info("Evaluation results for bbox: \n" + small_table)

        if self._dump:
            dump_info_one_task = {
                "task": "bbox",
                "tables": [small_table],
            }
            _dump_to_markdown([dump_info_one_task])
        return ret
コード例 #5
0
    def dump(self, results):
        self._logger.info("Dump metric to {}".format(self._output_file))
        small_table = create_small_table(results)
        self._logger.info("Evaulation results for mse:\n" + small_table)

        with open(self._output_file, "w") as f:
            f.write("MSE Evaluator:\n" + small_table)
            f.write("\n\n")
            for k, v in results.items():
                f.write(str(k) + "\t\t" + str(v) + "\n")
コード例 #6
0
ファイル: coco_evaluation.py プロジェクト: FateScript/cvpods
    def _eval_box_proposals(self):
        """
        Evaluate the box proposals in self._predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, objectness_logits = [], [], []
            for prediction in self._predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with megfile.smart_open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            logger.info("Annotations are not available for evaluation.")
            return

        logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(self._predictions,
                                                self._coco_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
        logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
コード例 #7
0
    def _eval_classification_accuracy(self):
        """
        Evaluate self._predictions on the classification task.
        Fill self._results with the metrics of the tasks.
        """
        batch_size = len(self._targets)

        pred = torch.cat(self._predictions, dim=1)
        target = torch.cat(self._targets)

        correct = pred.eq(target.reshape(1, -1).expand_as(pred))

        results = {}
        macro_f1_score = f1_score(target.detach().cpu().numpy(),
                                  pred[0].detach().cpu().numpy(),
                                  average='macro')
        results["Macro_F1"] = macro_f1_score
        # Update with accuracy of the sub-group
        sub_group_accuracy = self._eval_longtail_subgroup_accuracy(
            pred, target)
        keys = ['Many', 'Medium', 'Few']

        for iidx, key in enumerate(self._topk):
            correct_k = correct[:key].reshape(-1).float().sum(0, keepdim=True)
            results[f"Top_{key} Acc"] = correct_k.mul_(100.0 /
                                                       batch_size).item()
            for idx, subgroup in enumerate(keys):
                results[f'Top_{key} {subgroup} Acc'] = sub_group_accuracy[idx][
                    iidx]

        self._results["Accuracy"] = results

        small_table = create_small_table(results)
        logger.info("Evaluation results for classification: \n" + small_table)

        if self._dump:
            dump_info_one_task = {
                "task": "classification",
                "tables": [small_table],
                "dataset": self.dataset_name,
            }
            self._dump_infos.append(dump_info_one_task)
コード例 #8
0
    def _derive_coco_results(self, coco_eval, iou_type):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = [
            "Reasonable", "Reasonable_small", "Reasonable_occ=heavy", "All"
        ]

        if coco_eval is None:
            logger.warning("No predictions from the model! Set scores to -1")
            return {metric: -1 for metric in metrics}

        # the standard metrics
        results = {
            metric: coco_eval[idx]
            for idx, metric in enumerate(metrics)
        }
        small_table = create_small_table(results)
        logger.info("Evaluation results for {}: \n".format(iou_type) +
                    small_table)

        if self._dump:
            dump_info_one_task = {
                "task": iou_type,
                "tables": [small_table],
            }
            self._dump_infos.append(dump_info_one_task)
        # if class_names is None or len(class_names) <= 1:
        return results
コード例 #9
0
ファイル: coco_evaluation.py プロジェクト: FateScript/cvpods
    def _derive_coco_results(self,
                             coco_eval,
                             iou_type,
                             summary,
                             class_names=None):
        """
        Derive the desired score numbers from summarized COCOeval.

        Args:
            coco_eval (None or COCOEval): None represents no predictions from model.
            iou_type (str): specific evaluation task,
                optional values are: "bbox", "segm", "keypoints".
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """

        metrics = {
            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
        }[iou_type]

        if coco_eval is None:
            logger.warning("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # the standard metrics
        results = {
            metric: float(coco_eval.stats[idx] *
                          100 if coco_eval.stats[idx] >= 0 else "nan")
            for idx, metric in enumerate(metrics)
        }
        small_table = create_small_table(results)
        logger.info("Evaluation results for {}: \n".format(iou_type) +
                    small_table)
        if not np.isfinite(sum(results.values())):
            logger.info("Note that some metrics cannot be computed.")

        if class_names is None:  # or len(class_names) <= 1:
            return results
        # Compute per-category AP
        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        assert len(class_names) == precisions.shape[2]

        results_per_category = {}
        for idx, name in enumerate(class_names):
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            results_per_category[name] = float(ap * 100)
            # results_per_category.append(("{}".format(name), float(ap * 100)))

        # tabulate it
        table = create_table_with_header(results_per_category,
                                         headers=["category", "AP"])
        logger.info("Per-category {} AP: \n".format(iou_type) + table)

        results.update(
            {"AP-" + name: ap
             for name, ap in results_per_category.items()})
        if self._dump:
            dump_info_one_task = {
                "task": iou_type,
                "summary": summary.getvalue(),
                "tables": [small_table, table],
            }
            self._dump_infos.append(dump_info_one_task)
        return results
コード例 #10
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
        """
        comm.synchronize()
        if comm.get_rank() > 0:
            return
        os.environ["CITYSCAPES_DATASET"] = os.path.abspath(
            os.path.join(self._metadata.gt_dir, "..", "..")
        )
        # Load the Cityscapes eval script *after* setting the required env var,
        # since the script reads CITYSCAPES_DATASET into global variables at load time.
        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval

        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))

        # set some global states in cityscapes evaluation API, before evaluating
        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
        cityscapes_eval.args.predictionWalk = None
        cityscapes_eval.args.JSONOutput = False
        cityscapes_eval.args.colorized = False
        cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")

        # These lines are adopted from
        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
        groundTruthImgList = glob.glob(cityscapes_eval.args.groundTruthSearch)
        assert len(
            groundTruthImgList
        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
            cityscapes_eval.args.groundTruthSearch
        )
        predictionImgList = []
        for gt in groundTruthImgList:
            predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
        results = cityscapes_eval.evaluateImgLists(
            predictionImgList, groundTruthImgList, cityscapes_eval.args
        )["averages"]

        ret = OrderedDict()
        ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
        self._working_dir.cleanup()

        small_table = create_small_table(ret["segm"])
        self._logger.info("Evaluation results for segm: \n" + small_table)

        results_per_category = []
        for cat, ap in results["classes"].items():
            ap = [ap_i * 100 for ap_i in ap.values()]
            results_per_category.append([cat, *ap])

        table = tabulate(
            results_per_category,
            headers=["category", "AP", "AP50"],
            tablefmt="pipe",
            floatfmt=".3f",
            numalign="left"
        )
        self._logger.info("Per-category segm AP: \n" + table)

        if self._dump:
            dump_info_one_task = {
                "task": "segm",
                "tables": [small_table, table],
            }
            _dump_to_markdown([dump_info_one_task])
        return ret
コード例 #11
0
    def evaluate(self):
        """
        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):

        * Mean intersection-over-union averaged across classes (mIoU)
        * Frequency Weighted IoU (fwIoU)
        * Mean pixel accuracy averaged across classes (mACC)
        * Pixel Accuracy (pACC)
        """
        if self._distributed:
            comm.synchronize()
            conf_matrix_list = comm.all_gather(self._conf_matrix)
            self._predictions = comm.all_gather(self._predictions)
            self._predictions = list(itertools.chain(*self._predictions))
            if not comm.is_main_process():
                return

            self._conf_matrix = np.zeros_like(self._conf_matrix)
            for conf_matrix in conf_matrix_list:
                self._conf_matrix += conf_matrix

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "sem_seg_predictions.json")
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._predictions))

        acc = np.zeros(self._num_classes, dtype=np.float)
        iou = np.zeros(self._num_classes, dtype=np.float)
        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
        class_weights = pos_gt / np.sum(pos_gt)
        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
        acc_valid = pos_gt > 0
        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
        iou_valid = (pos_gt + pos_pred) > 0
        union = pos_gt + pos_pred - tp
        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
        macc = np.sum(acc) / np.sum(acc_valid)
        miou = np.sum(iou) / np.sum(iou_valid)
        fiou = np.sum(iou * class_weights)
        pacc = np.sum(tp) / np.sum(pos_gt)

        res = {}
        res["mIoU"] = 100 * miou
        res["fwIoU"] = 100 * fiou
        res["mACC"] = 100 * macc
        res["pACC"] = 100 * pacc

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "sem_seg_evaluation.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(res, f)
        results = OrderedDict({"sem_seg": res})

        small_table = create_small_table(res)
        self._logger.info("Evaluation results for sem_seg: \n" + small_table)

        if self._dump:
            dump_info_one_task = {
                "task": "sem_seg",
                "tables": [small_table],
            }
            _dump_to_markdown([dump_info_one_task])

        return results