def do_lvis_evaluation(
    dataset,
    gt_path,
    predictions,
    box_only,
    output_folder,
    iou_types,
    iteration,
):
    logger = logging.getLogger("maskrcnn_benchmark.inference")

    if box_only:
        logger.info("Evaluating bbox proposals")
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        res = COCOResults("box_proposal")
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = evaluate_box_proposals(
                    predictions, dataset, area=area, limit=limit
                )
                key = "AR{}@{:d}".format(suffix, limit)
                res.results["box_proposal"][key] = stats["ar"].item()
        logger.info(res)
        if output_folder:
            torch.save(res, os.path.join(output_folder, "box_proposals.pth"))
        return

    logger.info("Preparing results for LVIS format")
    lvis_results = prepare_for_lvis_evaluation(predictions, dataset, iou_types)
    if len(lvis_results) == 0:
        return {}

    dt_path = os.path.join(output_folder, "lvis_dt.json")
    import json
    with open(dt_path, "w") as f:
        json.dump(lvis_results, f)

    logger.info("Evaluating predictions")
    lvis_eval_info = {}
    for iou_type in iou_types:
        lvis_eval = LVISEval(
            gt_path, dt_path, iou_type
        )
        lvis_eval.run()
        print(iou_type)
        lvis_eval.print_results()
        keys = lvis_eval.get_results().keys()
        for k in keys:
            lvis_eval_info[iou_type + k] = lvis_eval.get_results()[k]

        save_path = os.path.join(output_folder, str(iteration))
        mkdir(save_path)
        lvis_eval_percat = LVISEvalPerCat(
            gt_path, dt_path, iou_type, save_path)
        lvis_eval_percat.run()
        lvis_eval_percat.print_results()
    return lvis_eval_info
Exemple #2
0
def _evaluate_predictions_on_lvis(lvis_gt,
                                  lvis_results,
                                  iou_type,
                                  class_names=None):
    metrics = {
        "bbox":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:
        logger.warn("No predictions from the model!")
        return {metric: float("nan") for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        for c in lvis_results:
            c.pop("bbox", None)

    from lvis import LVISEval, LVISResults

    lvis_results = LVISResults(lvis_gt, lvis_results)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
    lvis_eval.run()
    lvis_eval.print_results()

    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info("Evaluation results for {}: \n".format(iou_type) +
                create_small_table(results))
    return results
def _evaluate_predictions_on_lvis(
    lvis_gt, lvis_results, iou_type, class_names=None):
    """
    Args:
        iou_type (str):
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model! Set scores to -1")
        return {metric: -1 for metric in metrics}

    from lvis import LVISEval, LVISResults

    lvis_results = LVISResults(lvis_gt, lvis_results)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info(
        "Evaluation results for {}: \n".format(iou_type) + \
            create_small_table(results)
    )
    return results
Exemple #4
0
def _evaluate_predictions_on_lvis(
    lvis_gt, lvis_results, iou_type, max_dets=None, class_names=None
):
    """
    Copied from detectron2.evaluation.lvis_evaluation, with support for max_dets.

    Args:
        iou_type (str):
        kpt_oks_sigmas (list[float]):
        max_dets (None or int)
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = {
        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model!")
        return {metric: float("nan") for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        # When evaluating mask AP, if the results contain bbox, LVIS API will
        # use the box area as the area of the instance, instead of the mask area.
        # This leads to a different definition of small/medium/large.
        # We remove the bbox field to let mask AP use mask area.
        for c in lvis_results:
            c.pop("bbox", None)

    from lvis import LVISEval, LVISResults

    #####
    # <modified>
    if max_dets is None:
        max_dets = 300

    lvis_results_obj = LVISResults(lvis_gt, lvis_results, max_dets=max_dets)
    lvis_eval = LVISEval(lvis_gt, lvis_results_obj, iou_type)
    lvis_eval.params.max_dets = max_dets
    # </modified>
    #####
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info(
        f"Evaluation results for {iou_type}, max_dets {max_dets} \n"
        + create_small_table(results)
    )
    return results
Exemple #5
0
def _evaluate_predictions_on_lvis(lvis_gt,
                                  lvis_results,
                                  iou_type,
                                  max_dets_per_image=None,
                                  class_names=None):
    """
    Args:
        iou_type (str):
        max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
            This limit, by default of the LVIS dataset, is 300.
        class_names (None or list[str]): if provided, will use it to predict
            per-category AP.

    Returns:
        a dict of {metric name: score}
    """
    metrics = {
        "bbox":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model!")
        return {metric: float("nan") for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        # When evaluating mask AP, if the results contain bbox, LVIS API will
        # use the box area as the area of the instance, instead of the mask area.
        # This leads to a different definition of small/medium/large.
        # We remove the bbox field to let mask AP use mask area.
        for c in lvis_results:
            c.pop("bbox", None)

    if max_dets_per_image is None:
        max_dets_per_image = 300  # Default for LVIS dataset

    from lvis import LVISEval, LVISResults

    logger.info(
        f"Evaluating with max detections per image = {max_dets_per_image}")
    lvis_results = LVISResults(lvis_gt,
                               lvis_results,
                               max_dets=max_dets_per_image)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
    lvis_eval.run()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info("Evaluation results for {}: \n".format(iou_type) +
                create_small_table(results))
    return results
Exemple #6
0
def do_lvis_evaluation(
    dataset,
    gt_path,
    predictions,
    output_folder,
    iou_types,
    iteration,
):
    logger = logging.getLogger("maskrcnn_benchmark.inference")

    logger.info("Preparing results for LVIS format")
    lvis_results = prepare_for_lvis_evaluation(predictions, dataset, iou_types)
    if len(lvis_results) == 0:
        return {}

    dt_path = os.path.join(output_folder, "lvis_dt.json")
    import json
    with open(dt_path, "w") as f:
        json.dump(lvis_results, f)

    logger.info("Evaluating predictions")
    lvis_eval_info = {}
    for iou_type in iou_types:
        lvis_eval = LVISEval(gt_path, dt_path, iou_type)
        lvis_eval.run()
        print(iou_type)
        lvis_eval.print_results()
        keys = lvis_eval.get_results().keys()
        for k in keys:
            lvis_eval_info[iou_type + k] = lvis_eval.get_results()[k]

        save_path = os.path.join(output_folder, str(iteration))
        mkdir(save_path)
        lvis_eval_percat = LVISEvalPerCat(gt_path, dt_path, iou_type,
                                          save_path)
        lvis_eval_percat.run()
        lvis_eval_percat.print_results()
    return lvis_eval_info
def eval_partial_results(epoch, dset_name, validation_path):
    results = []
    mAP = -1
    directory = 'bbox_results/temp_res'
    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            temp_name = os.path.join(directory, filename)
            with open(temp_name, 'rb') as f:
                results = list(itertools.chain(results, pickle.load(f)))

    cwd = os.getenv('owd')
    validation_path = os.path.join(cwd, validation_path)

    if not os.path.exists(f'bbox_results/{dset_name}/'):
        os.makedirs(f'bbox_results/{dset_name}/')

    json.dump(results,
              open(f'./bbox_results/{dset_name}/results_{epoch}.json', 'w'),
              indent=4)
    resFile = f'./bbox_results/{dset_name}/results_{epoch}.json'

    if (dset_name == 'coco') | (dset_name == 'drones'):
        cocoGt = COCO(validation_path)
        try:
            cocoDt = cocoGt.loadRes(resFile)
        except IndexError:
            print('empty list return zero map')
            return 0
        cocoDt.loadAnns()

        #  running evaluation
        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        mAP = cocoEval.stats[0]

    elif (dset_name == 'lvis'):

        lvis_eval = LVISEval(validation_path, resFile, 'bbox')
        lvis_eval.run()
        metrics = lvis_eval.get_results()
        lvis_eval.print_results()
        mAP = metrics['AP']

    return (mAP)
def eval_results(results, dset_name, validation_path):

    cwd = os.getenv('owd')
    validation_path = os.path.join(cwd, validation_path)

    if not os.path.exists(f'bbox_results/{dset_name}/'):
        os.makedirs(f'bbox_results/{dset_name}/')

    rid = (random.randint(0, 1000000))
    json.dump(results,
              open(f'./bbox_results/{dset_name}/results_{rid}.json', 'w'),
              indent=4)
    resFile = f'./bbox_results/{dset_name}/results_{rid}.json'

    if (dset_name == 'coco') | (dset_name == 'drones'):
        cocoGt = COCO(validation_path)
        try:
            cocoDt = cocoGt.loadRes(resFile)
        except IndexError:
            print('empty list return zero map')
            return 0
        cocoDt.loadAnns()

        #  running evaluation
        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        mAP = cocoEval.stats[0]

    elif (dset_name == 'lvis'):
        try:
            lvis_eval = LVISEval(validation_path, resFile, 'bbox')
        except IndexError:
            print('empty list return zero map')
            return 0
        lvis_eval.run()
        metrics = lvis_eval.get_results()
        lvis_eval.print_results()
        mAP = metrics['AP']

    os.remove(resFile)

    return (mAP)
Exemple #9
0
    def evaluate(self,
                 results,
                 metric=['track'],
                 logger=None,
                 resfile_path=None):
        if isinstance(metric, list):
            metrics = metric
        elif isinstance(metric, str):
            metrics = [metric]
        else:
            raise TypeError('metric must be a list or a str.')
        allowed_metrics = ['bbox', 'track']
        for metric in metrics:
            if metric not in allowed_metrics:
                raise KeyError(f'metric {metric} is not supported.')

        result_files, tmp_dir = self.format_results(results, resfile_path)

        eval_results = dict()

        if 'track' in metrics:
            from tao.toolkit.tao import TaoEval
            print_log('Evaluating TAO results...', logger)
            tao_eval = TaoEval(self.ann_file, result_files['track'])
            tao_eval.params.img_ids = self.img_ids
            tao_eval.params.cat_ids = self.cat_ids
            tao_eval.params.iou_thrs = np.array([0.5, 0.75])
            tao_eval.run()

            tao_eval.print_results()
            tao_results = tao_eval.get_results()
            for k, v in tao_results.items():
                if isinstance(k, str) and k.startswith('AP'):
                    key = 'track_{}'.format(k)
                    val = float('{:.3f}'.format(float(v)))
                    eval_results[key] = val

        if 'bbox' in metrics:
            print_log('Evaluating detection results...', logger)
            lvis_gt = LVIS(self.ann_file)
            lvis_dt = LVISResults(lvis_gt, result_files['bbox'])
            lvis_eval = LVISEval(lvis_gt, lvis_dt, 'bbox')
            lvis_eval.params.imgIds = self.img_ids
            lvis_eval.params.catIds = self.cat_ids
            lvis_eval.evaluate()
            lvis_eval.accumulate()
            lvis_eval.summarize()
            lvis_eval.print_results()
            lvis_results = lvis_eval.get_results()
            for k, v in lvis_results.items():
                if k.startswith('AP'):
                    key = '{}_{}'.format('bbox', k)
                    val = float('{:.3f}'.format(float(v)))
                    eval_results[key] = val
            ap_summary = ' '.join([
                '{}:{:.3f}'.format(k, float(v))
                for k, v in lvis_results.items() if k.startswith('AP')
            ])
            eval_results['bbox_mAP_copypaste'] = ap_summary

        if tmp_dir is not None:
            tmp_dir.cleanup()

        return eval_results
Exemple #10
0
    def evaluate(self,
                 results,
                 metric='bbox',
                 logger=None,
                 jsonfile_prefix=None,
                 classwise=False,
                 proposal_nums=(100, 300, 1000),
                 iou_thrs=np.arange(0.5, 0.96, 0.05)):
        """Evaluation in LVIS protocol.

        Args:
            results (list[list | tuple]): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated. Options are
                'bbox', 'segm', 'proposal', 'proposal_fast'.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            jsonfile_prefix (str | None):
            classwise (bool): Whether to evaluating the AP for each class.
            proposal_nums (Sequence[int]): Proposal number used for evaluating
                recalls, such as recall@100, recall@1000.
                Default: (100, 300, 1000).
            iou_thrs (Sequence[float]): IoU threshold used for evaluating
                recalls. If set to a list, the average recall of all IoUs will
                also be computed. Default: 0.5.

        Returns:
            dict[str, float]: LVIS style metrics.
        """

        try:
            import lvis
            assert lvis.__version__ >= '10.5.3'
            from lvis import LVISResults, LVISEval
        except AssertionError:
            raise AssertionError('Incompatible version of lvis is installed. '
                                 'Run pip uninstall lvis first. Then run pip '
                                 'install mmlvis to install open-mmlab forked '
                                 'lvis. ')
        except ImportError:
            raise ImportError('Package lvis is not installed. Please run pip '
                              'install mmlvis to install open-mmlab forked '
                              'lvis.')
        assert isinstance(results, list), 'results must be a list'
        assert len(results) == len(self), (
            'The length of results is not equal to the dataset len: {} != {}'.
            format(len(results), len(self)))

        metrics = metric if isinstance(metric, list) else [metric]
        allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast']
        for metric in metrics:
            if metric not in allowed_metrics:
                raise KeyError('metric {} is not supported'.format(metric))

        if jsonfile_prefix is None:
            tmp_dir = tempfile.TemporaryDirectory()
            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
        else:
            tmp_dir = None
        result_files = self.results2json(results, jsonfile_prefix)

        eval_results = {}
        # get original api
        lvis_gt = self.coco
        for metric in metrics:
            msg = 'Evaluating {}...'.format(metric)
            if logger is None:
                msg = '\n' + msg
            print_log(msg, logger=logger)

            if metric == 'proposal_fast':
                ar = self.fast_eval_recall(results,
                                           proposal_nums,
                                           iou_thrs,
                                           logger='silent')
                log_msg = []
                for i, num in enumerate(proposal_nums):
                    eval_results['AR@{}'.format(num)] = ar[i]
                    log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i]))
                log_msg = ''.join(log_msg)
                print_log(log_msg, logger=logger)
                continue

            if metric not in result_files:
                raise KeyError('{} is not in results'.format(metric))
            try:
                lvis_dt = LVISResults(lvis_gt, result_files[metric])
            except IndexError:
                print_log('The testing results of the whole dataset is empty.',
                          logger=logger,
                          level=logging.ERROR)
                break

            iou_type = 'bbox' if metric == 'proposal' else metric
            lvis_eval = LVISEval(lvis_gt, lvis_dt, iou_type)
            lvis_eval.params.imgIds = self.img_ids
            if metric == 'proposal':
                lvis_eval.params.useCats = 0
                lvis_eval.params.maxDets = list(proposal_nums)
                lvis_eval.evaluate()
                lvis_eval.accumulate()
                lvis_eval.summarize()
                for k, v in lvis_eval.get_results().items():
                    if k.startswith('AR'):
                        val = float('{:.3f}'.format(float(v)))
                        eval_results[k] = val
            else:
                lvis_eval.evaluate()
                lvis_eval.accumulate()
                lvis_eval.summarize()
                lvis_results = lvis_eval.get_results()
                if classwise:  # Compute per-category AP
                    # Compute per-category AP
                    # from https://github.com/facebookresearch/detectron2/
                    precisions = lvis_eval.eval['precision']
                    # precision: (iou, recall, cls, area range, max dets)
                    assert len(self.cat_ids) == precisions.shape[2]

                    results_per_category = []
                    for idx, catId in enumerate(self.cat_ids):
                        # area range index 0: all area ranges
                        # max dets index -1: typically 100 per image
                        nm = self.coco.load_cats(catId)[0]
                        precision = precisions[:, :, idx, 0, -1]
                        precision = precision[precision > -1]
                        if precision.size:
                            ap = np.mean(precision)
                        else:
                            ap = float('nan')
                        results_per_category.append(
                            (f'{nm["name"]}', f'{float(ap):0.3f}'))

                    num_columns = min(6, len(results_per_category) * 2)
                    results_flatten = list(
                        itertools.chain(*results_per_category))
                    headers = ['category', 'AP'] * (num_columns // 2)
                    results_2d = itertools.zip_longest(*[
                        results_flatten[i::num_columns]
                        for i in range(num_columns)
                    ])
                    table_data = [headers]
                    table_data += [result for result in results_2d]
                    table = AsciiTable(table_data)
                    print_log('\n' + table.table, logger=logger)

                for k, v in lvis_results.items():
                    if k.startswith('AP'):
                        key = '{}_{}'.format(metric, k)
                        val = float('{:.3f}'.format(float(v)))
                        eval_results[key] = val
                ap_summary = ' '.join([
                    '{}:{:.3f}'.format(k, float(v))
                    for k, v in lvis_results.items() if k.startswith('AP')
                ])
                eval_results['{}_mAP_copypaste'.format(metric)] = ap_summary
            lvis_eval.print_results()
        if tmp_dir is not None:
            tmp_dir.cleanup()
        return eval_results
Exemple #11
0
    def evaluate(self,
                 results,
                 metric='bbox',
                 logger=None,
                 jsonfile_prefix=None,
                 classwise=False,
                 proposal_nums=300,
                 iou_thrs=np.arange(0.5, 0.96, 0.05)):
        """Evaluation in COCO protocol.

        Args:
            results (list): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            jsonfile_prefix (str | None): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
            classwise (bool): Whether to evaluating the AP for each class.
            proposal_nums (Sequence[int]): Proposal number used for evaluating
                recalls, such as recall@100, recall@1000.
                Default: (100, 300, 1000).
            iou_thrs (Sequence[float]): IoU threshold used for evaluating
                recalls. If set to a list, the average recall of all IoUs will
                also be computed. Default: 0.5.

        Returns:
            dict[str: float]
        """

        metrics = metric if isinstance(metric, list) else [metric]
        allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast']
        for metric in metrics:
            if metric not in allowed_metrics:
                raise KeyError('metric {} is not supported'.format(metric))

        result_files = self.format_results(results, jsonfile_prefix)

        eval_results = {}
        cocoGt = self.coco
        for metric in metrics:
            msg = 'Evaluating {}...'.format(metric)
            if logger is None:
                msg = '\n' + msg
            print_log(msg, logger=logger)

            if metric == 'proposal_fast':
                ar = self.fast_eval_recall(results,
                                           proposal_nums,
                                           iou_thrs,
                                           logger='silent')
                log_msg = []
                for i, num in enumerate(proposal_nums):
                    eval_results['AR@{}'.format(num)] = ar[i]
                    log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i]))
                log_msg = ''.join(log_msg)
                print_log(log_msg, logger=logger)
                continue

            if metric not in result_files:
                raise KeyError('{} is not in results'.format(metric))
            try:
                cocoDt = cocoGt.loadRes(result_files[metric])
            except IndexError:
                print_log('The testing results of the whole dataset is empty.',
                          logger=logger,
                          level=logging.ERROR)
                break

            iou_type = 'bbox' if metric == 'proposal' else metric
            # run lvis evaluation
            eval_results['lvis'] = {}
            lvis_eval = LVISEval(self.ann_file_path, result_files[metric],
                                 iou_type)
            lvis_eval.params.max_dets = proposal_nums
            lvis_eval.run()
            lvis_eval.print_results()
            print('=====> The above metric is {}.'.format(iou_type))
            keys = lvis_eval.get_results().keys()
            for k in keys:
                eval_results['lvis'][iou_type + k] = lvis_eval.get_results()[k]

        return eval_results
    results = evaluate(model, test_loader, args.device)
    res_path = os.path.join(
        out_dir, (args.resume).split("/")[-1].split(".")[0] + ".json")
    json.dump(results, open(res_path, 'w'), indent=4)

    if args.dataset == 'coco':
        cocoGt = COCO(annotations)
        try:
            cocoDt = cocoGt.loadRes(res_path)
        except IndexError:
            print('empty list return zero map')
        cocoDt.loadAnns()

        #  running evaluation
        cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        mAP = cocoEval.stats[0]

    elif (args.dataset == 'lvis'):
        try:
            lvis_eval = LVISEval(annotations, res_path, 'bbox')
        except IndexError:
            print('empty list return zero map')
        lvis_eval.run()
        metrics = lvis_eval.get_results()
        lvis_eval.print_results()
        mAP = metrics['AP']
def _evaluate_predictions_on_lvis_per_class(lvis_gt,
                                            lvis_results,
                                            iou_type,
                                            class_names=None):
    """
        Args:
            iou_type (str):
            kpt_oks_sigmas (list[float]):
            class_names (None or list[str]): if provided, will use it to predict
                per-category AP.

        Returns:
            a dict of {metric name: score}
        """
    metrics = {
        "bbox":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        "segm":
        ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
    }[iou_type]

    logger = logging.getLogger(__name__)

    if len(lvis_results) == 0:  # TODO: check if needed
        logger.warn("No predictions from the model! Set scores to -1")
        return {metric: -1 for metric in metrics}

    if iou_type == "segm":
        lvis_results = copy.deepcopy(lvis_results)
        # When evaluating mask AP, if the results contain bbox, LVIS API will
        # use the box area as the area of the instance, instead of the mask area.
        # This leads to a different definition of small/medium/large.
        # We remove the bbox field to let mask AP use mask area.
        for c in lvis_results:
            c.pop("bbox", None)

    from lvis import LVISEval, LVISResults

    lvis_results = LVISResults(lvis_gt, lvis_results)
    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)

    lvis_eval.evaluate()
    lvis_eval.accumulate()
    precisions = lvis_eval.eval["precision"]

    results_per_category = []
    for idx, name in enumerate(class_names):
        # area range index 0: all area ranges
        # max dets index -1: typically 100 per image
        precision = precisions[:, :, idx, 0]
        precision = precision[precision > -1]
        ap = np.mean(precision) if precision.size else float("nan")
        results_per_category.append(("{}".format(name), float(ap * 100)))

    lvis_eval.summarize()
    lvis_eval.print_results()

    # Pull the standard metrics from the LVIS results
    results = lvis_eval.get_results()
    results = {metric: float(results[metric] * 100) for metric in metrics}
    logger.info("Evaluation results for {}: \n".format(iou_type) +
                create_small_table(results))
    results.update({"AP-" + name: ap for name, ap in results_per_category})
    return results