def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in LVIS protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str, float]: LVIS style metrics. """ try: from lvis import LVISResults, LVISEval except ImportError: raise ImportError('Please follow config/lvis/README.md to ' 'install open-mmlab forked lvis first.') assert isinstance(results, list), 'results must be a list' assert len(results) == len(self), ( 'The length of results is not equal to the dataset len: {} != {}'. format(len(results), len(self))) metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) if jsonfile_prefix is None: tmp_dir = tempfile.TemporaryDirectory() jsonfile_prefix = osp.join(tmp_dir.name, 'results') else: tmp_dir = None result_files = self.results2json(results, jsonfile_prefix) eval_results = {} # get original api lvis_gt = self.coco for metric in metrics: msg = 'Evaluating {}...'.format(metric) if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results['AR@{}'.format(num)] = ar[i] log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i])) log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError('{} is not in results'.format(metric)) try: lvis_dt = LVISResults(lvis_gt, result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric lvis_eval = LVISEval(lvis_gt, lvis_dt, iou_type) lvis_eval.params.imgIds = self.img_ids if metric == 'proposal': lvis_eval.params.useCats = 0 lvis_eval.params.maxDets = list(proposal_nums) lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() for k, v in lvis_eval.get_results().items(): if k.startswith('AR'): val = float('{:.3f}'.format(float(v))) eval_results[k] = val else: lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_results = lvis_eval.get_results() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = lvis_eval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.load_cats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format(metric, k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['{}_mAP_copypaste'.format(metric)] = ap_summary lvis_eval.print_results() if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split("\n")[0] if __doc__ else "", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("annotations_json", type=Path) parser.add_argument("results_json", type=Path) parser.add_argument("output_dir", type=Path) parser.add_argument("--type", default="segm", choices=["segm", "bbox"]) parser.add_argument("--dets-per-cat", default=-1, type=int) parser.add_argument("--max-dets", default=-1, type=int) parser.add_argument("--ious", nargs="*", type=float) # NOTE: We default to only using areas=all, since we don't report S/M/L APs here. parser.add_argument( "--areas", nargs="*", type=str, choices=["all", "small", "medium", "large"], default=["all"], ) parser.add_argument( "--pools", nargs="*", choices=["all", "r", "c", "f"], default=["all", "r", "c", "f"], ) args = parser.parse_args() args.output_dir.mkdir(exist_ok=True, parents=True) logger = setup_logger(output=str(args.output_dir.resolve()), name=__file__) log_path = args.output_dir / "log.txt" results = str(args.results_json) if args.dets_per_cat > 0: with open(args.results_json, "r") as f: results = json.load(f) by_cat = defaultdict(list) for ann in results: by_cat[ann["category_id"]].append(ann) results = [] topk = args.dets_per_cat for cat_anns in by_cat.values(): results.extend( sorted(cat_anns, key=lambda x: x["score"], reverse=True)[:topk]) gt = LVIS(args.annotations_json) results = LVISResults(gt, results, max_dets=args.max_dets) lvis_eval = LVISPooledEval(gt, results, iou_type=args.type, pools=args.pools) params = lvis_eval.params params.max_dets = args.max_dets if args.ious: params.iou_thrs = args.ious if args.areas: indices = [ i for i, area in enumerate(params.area_rng_lbl) if area in args.areas ] params.area_rng_lbl = [params.area_rng_lbl[i] for i in indices] params.area_rng = [params.area_rng[i] for i in indices] lvis_eval.run() lvis_eval.print_results() metrics = { k: v for k, v in lvis_eval.results.items() if k.startswith("AP") } logger.info("copypaste: %s,%s", ",".join(map(str, metrics.keys())), "path") logger.info( "copypaste: %s,%s", ",".join(f"{v*100:.2f}" for v in metrics.values()), log_path, )
def reload_annotations(self): self.lvis = LVIS('/scratch/users/zzweng/datasets/lvis/lvis_v0.5_val.json') self.dt_path = 'output/inference/lvis_instances_results.json' self.lvis_dt = LVISResults(self.lvis, self.dt_path)
def evaluate(self, results, metric=['track'], logger=None, resfile_path=None): if isinstance(metric, list): metrics = metric elif isinstance(metric, str): metrics = [metric] else: raise TypeError('metric must be a list or a str.') allowed_metrics = ['bbox', 'track'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported.') result_files, tmp_dir = self.format_results(results, resfile_path) eval_results = dict() if 'track' in metrics: from tao.toolkit.tao import TaoEval print_log('Evaluating TAO results...', logger) tao_eval = TaoEval(self.ann_file, result_files['track']) tao_eval.params.img_ids = self.img_ids tao_eval.params.cat_ids = self.cat_ids tao_eval.params.iou_thrs = np.array([0.5, 0.75]) tao_eval.run() tao_eval.print_results() tao_results = tao_eval.get_results() for k, v in tao_results.items(): if isinstance(k, str) and k.startswith('AP'): key = 'track_{}'.format(k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val if 'bbox' in metrics: try: import lvis assert lvis.__version__ >= '10.5.3' from lvis import LVIS, LVISResults, LVISEval except AssertionError: raise AssertionError( 'Incompatible version of lvis is installed. ' 'Run pip uninstall lvis first. Then run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis. ') except ImportError: raise ImportError( 'Package lvis is not installed. Please run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis.') print_log('Evaluating detection results...', logger) lvis_gt = LVIS(self.ann_file) lvis_dt = LVISResults(lvis_gt, result_files['bbox']) lvis_eval = LVISEval(lvis_gt, lvis_dt, 'bbox') lvis_eval.params.imgIds = self.img_ids lvis_eval.params.catIds = self.cat_ids lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_eval.print_results() lvis_results = lvis_eval.get_results() for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format('bbox', k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['bbox_mAP_copypaste'] = ap_summary if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def _evaluate_predictions_on_lvis_per_class(lvis_gt, lvis_results, iou_type, class_names=None): """ Args: iou_type (str): kpt_oks_sigmas (list[float]): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) from lvis import LVISEval, LVISResults lvis_results = LVISResults(lvis_gt, lvis_results) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.evaluate() lvis_eval.accumulate() precisions = lvis_eval.eval["precision"] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) lvis_eval.summarize() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split("\n")[0] if __doc__ else "", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("annotations_json", type=Path) parser.add_argument("results_json", type=Path) parser.add_argument("output_dir", type=Path) parser.add_argument("--type", default="segm", choices=["segm", "bbox"]) parser.add_argument("--dets-per-cat", default=10000, type=int) parser.add_argument("--ious", nargs="*", type=float) args = parser.parse_args() args.output_dir.mkdir(exist_ok=True, parents=True) logger = setup_logger(output=str(args.output_dir.resolve()), name=__file__) log_path = args.output_dir / "log.txt" assert args.dets_per_cat > 0 with open(args.results_json, "r") as f: results = json.load(f) by_cat = defaultdict(list) for ann in results: by_cat[ann["category_id"]].append(ann) results = [] topk = args.dets_per_cat missing_dets_cats = set() for cat, cat_anns in by_cat.items(): if len(cat_anns) < topk: missing_dets_cats.add(cat) results.extend( sorted(cat_anns, key=lambda x: x["score"], reverse=True)[:topk]) if missing_dets_cats: logger.warning( f"\n===\n" f"{len(missing_dets_cats)} classes had less than {topk} detections!\n" f"Outputting {topk} detections for each class will improve AP further.\n" f"If using detectron2, please use the lvdevil/infer_topk.py script to " f"output a results file with {topk} detections for each class.\n" f"===") gt = LVIS(args.annotations_json) results = LVISResults(gt, results, max_dets=-1) lvis_eval = LVISEval(gt, results, iou_type=args.type) params = lvis_eval.params params.max_dets = -1 # No limit on detections per image. if args.ious: params.iou_thrs = args.ious lvis_eval.run() lvis_eval.print_results() metrics = { k: v for k, v in lvis_eval.results.items() if k.startswith("AP") } logger.info("copypaste: %s,%s", ",".join(map(str, metrics.keys())), "path") logger.info( "copypaste: %s,%s", ",".join(f"{v*100:.2f}" for v in metrics.values()), log_path, )
def eval_cocofied_lvis_result(self, gt_file, result_file, metric='segm'): def get_lvis_format_result(lvis_params, lvis_results): template = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} catIds={:>3s}] = {:0.3f}" result_list = [] for key, value in lvis_results.items(): max_dets = lvis_params.max_dets if "AP" in key: title = "Average Precision" _type = "(AP)" else: title = "Average Recall" _type = "(AR)" if len(key) > 2 and key[2].isdigit(): iou_thr = (float(key[2:]) / 100) iou = "{:0.2f}".format(iou_thr) else: iou = "{:0.2f}:{:0.2f}".format( lvis_params.iou_thrs[0], lvis_params.iou_thrs[-1] ) if len(key) > 2 and key[2] in ["r", "c", "f"]: cat_group_name = key[2] else: cat_group_name = "all" if len(key) > 2 and key[2] in ["s", "m", "l"]: area_rng = key[2] else: area_rng = "all" result_list.append(template.format(title, _type, iou, area_rng, max_dets, cat_group_name, value)) return result_list print('load gt json') lvis_gt = LVIS(gt_file) cat_ids = lvis_gt.get_cat_ids() print('load pred json') lvis_dt = LVISResults(lvis_gt, result_file) print('evaluating') lvis_eval = LVISEval(lvis_gt, lvis_dt, metric) lvis_eval.params.imgIds = lvis_gt.get_img_ids() lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() # Compute per-category AP precisions = lvis_eval.eval['precision'] assert len(cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(cat_ids): nm = lvis_gt.load_cats([catId])[0] precision = precisions[:, :, idx, 0] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table) format_summary_result_list = get_lvis_format_result(lvis_eval.params, lvis_eval.results) format_summary_result = "\n".join(format_summary_result_list) with open(f"cocofied_per-category-ap-{metric}.txt", 'w') as f: f.write(table.table + "\n" + format_summary_result) lvis_eval.print_results()