def do_lvis_evaluation( dataset, gt_path, predictions, box_only, output_folder, iou_types, iteration, ): logger = logging.getLogger("maskrcnn_benchmark.inference") if box_only: logger.info("Evaluating bbox proposals") areas = {"all": "", "small": "s", "medium": "m", "large": "l"} res = COCOResults("box_proposal") for limit in [100, 1000]: for area, suffix in areas.items(): stats = evaluate_box_proposals( predictions, dataset, area=area, limit=limit ) key = "AR{}@{:d}".format(suffix, limit) res.results["box_proposal"][key] = stats["ar"].item() logger.info(res) if output_folder: torch.save(res, os.path.join(output_folder, "box_proposals.pth")) return logger.info("Preparing results for LVIS format") lvis_results = prepare_for_lvis_evaluation(predictions, dataset, iou_types) if len(lvis_results) == 0: return {} dt_path = os.path.join(output_folder, "lvis_dt.json") import json with open(dt_path, "w") as f: json.dump(lvis_results, f) logger.info("Evaluating predictions") lvis_eval_info = {} for iou_type in iou_types: lvis_eval = LVISEval( gt_path, dt_path, iou_type ) lvis_eval.run() print(iou_type) lvis_eval.print_results() keys = lvis_eval.get_results().keys() for k in keys: lvis_eval_info[iou_type + k] = lvis_eval.get_results()[k] save_path = os.path.join(output_folder, str(iteration)) mkdir(save_path) lvis_eval_percat = LVISEvalPerCat( gt_path, dt_path, iou_type, save_path) lvis_eval_percat.run() lvis_eval_percat.print_results() return lvis_eval_info
def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None): metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) for c in lvis_results: c.pop("bbox", None) from lvis import LVISEval, LVISResults lvis_results = LVISResults(lvis_gt, lvis_results) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.run() lvis_eval.print_results() results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) return results
def _evaluate_predictions_on_lvis( lvis_gt, lvis_results, iou_type, class_names=None): """ Args: iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} from lvis import LVISEval, LVISResults lvis_results = LVISResults(lvis_gt, lvis_results) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info( "Evaluation results for {}: \n".format(iou_type) + \ create_small_table(results) ) return results
def _evaluate_predictions_on_lvis( lvis_gt, lvis_results, iou_type, max_dets=None, class_names=None ): """ Copied from detectron2.evaluation.lvis_evaluation, with support for max_dets. Args: iou_type (str): kpt_oks_sigmas (list[float]): max_dets (None or int) class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) from lvis import LVISEval, LVISResults ##### # <modified> if max_dets is None: max_dets = 300 lvis_results_obj = LVISResults(lvis_gt, lvis_results, max_dets=max_dets) lvis_eval = LVISEval(lvis_gt, lvis_results_obj, iou_type) lvis_eval.params.max_dets = max_dets # </modified> ##### lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info( f"Evaluation results for {iou_type}, max_dets {max_dets} \n" + create_small_table(results) ) return results
def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None): """ Args: iou_type (str): max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP This limit, by default of the LVIS dataset, is 300. class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) if max_dets_per_image is None: max_dets_per_image = 300 # Default for LVIS dataset from lvis import LVISEval, LVISResults logger.info( f"Evaluating with max detections per image = {max_dets_per_image}") lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.run() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) return results
def do_lvis_evaluation( dataset, gt_path, predictions, output_folder, iou_types, iteration, ): logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Preparing results for LVIS format") lvis_results = prepare_for_lvis_evaluation(predictions, dataset, iou_types) if len(lvis_results) == 0: return {} dt_path = os.path.join(output_folder, "lvis_dt.json") import json with open(dt_path, "w") as f: json.dump(lvis_results, f) logger.info("Evaluating predictions") lvis_eval_info = {} for iou_type in iou_types: lvis_eval = LVISEval(gt_path, dt_path, iou_type) lvis_eval.run() print(iou_type) lvis_eval.print_results() keys = lvis_eval.get_results().keys() for k in keys: lvis_eval_info[iou_type + k] = lvis_eval.get_results()[k] save_path = os.path.join(output_folder, str(iteration)) mkdir(save_path) lvis_eval_percat = LVISEvalPerCat(gt_path, dt_path, iou_type, save_path) lvis_eval_percat.run() lvis_eval_percat.print_results() return lvis_eval_info
def eval_partial_results(epoch, dset_name, validation_path): results = [] mAP = -1 directory = 'bbox_results/temp_res' for filename in os.listdir(directory): if filename.endswith(".json"): temp_name = os.path.join(directory, filename) with open(temp_name, 'rb') as f: results = list(itertools.chain(results, pickle.load(f))) cwd = os.getenv('owd') validation_path = os.path.join(cwd, validation_path) if not os.path.exists(f'bbox_results/{dset_name}/'): os.makedirs(f'bbox_results/{dset_name}/') json.dump(results, open(f'./bbox_results/{dset_name}/results_{epoch}.json', 'w'), indent=4) resFile = f'./bbox_results/{dset_name}/results_{epoch}.json' if (dset_name == 'coco') | (dset_name == 'drones'): cocoGt = COCO(validation_path) try: cocoDt = cocoGt.loadRes(resFile) except IndexError: print('empty list return zero map') return 0 cocoDt.loadAnns() # running evaluation cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mAP = cocoEval.stats[0] elif (dset_name == 'lvis'): lvis_eval = LVISEval(validation_path, resFile, 'bbox') lvis_eval.run() metrics = lvis_eval.get_results() lvis_eval.print_results() mAP = metrics['AP'] return (mAP)
def eval_results(results, dset_name, validation_path): cwd = os.getenv('owd') validation_path = os.path.join(cwd, validation_path) if not os.path.exists(f'bbox_results/{dset_name}/'): os.makedirs(f'bbox_results/{dset_name}/') rid = (random.randint(0, 1000000)) json.dump(results, open(f'./bbox_results/{dset_name}/results_{rid}.json', 'w'), indent=4) resFile = f'./bbox_results/{dset_name}/results_{rid}.json' if (dset_name == 'coco') | (dset_name == 'drones'): cocoGt = COCO(validation_path) try: cocoDt = cocoGt.loadRes(resFile) except IndexError: print('empty list return zero map') return 0 cocoDt.loadAnns() # running evaluation cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mAP = cocoEval.stats[0] elif (dset_name == 'lvis'): try: lvis_eval = LVISEval(validation_path, resFile, 'bbox') except IndexError: print('empty list return zero map') return 0 lvis_eval.run() metrics = lvis_eval.get_results() lvis_eval.print_results() mAP = metrics['AP'] os.remove(resFile) return (mAP)
def evaluate(self, results, metric=['track'], logger=None, resfile_path=None): if isinstance(metric, list): metrics = metric elif isinstance(metric, str): metrics = [metric] else: raise TypeError('metric must be a list or a str.') allowed_metrics = ['bbox', 'track'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported.') result_files, tmp_dir = self.format_results(results, resfile_path) eval_results = dict() if 'track' in metrics: from tao.toolkit.tao import TaoEval print_log('Evaluating TAO results...', logger) tao_eval = TaoEval(self.ann_file, result_files['track']) tao_eval.params.img_ids = self.img_ids tao_eval.params.cat_ids = self.cat_ids tao_eval.params.iou_thrs = np.array([0.5, 0.75]) tao_eval.run() tao_eval.print_results() tao_results = tao_eval.get_results() for k, v in tao_results.items(): if isinstance(k, str) and k.startswith('AP'): key = 'track_{}'.format(k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val if 'bbox' in metrics: print_log('Evaluating detection results...', logger) lvis_gt = LVIS(self.ann_file) lvis_dt = LVISResults(lvis_gt, result_files['bbox']) lvis_eval = LVISEval(lvis_gt, lvis_dt, 'bbox') lvis_eval.params.imgIds = self.img_ids lvis_eval.params.catIds = self.cat_ids lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_eval.print_results() lvis_results = lvis_eval.get_results() for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format('bbox', k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['bbox_mAP_copypaste'] = ap_summary if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in LVIS protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str, float]: LVIS style metrics. """ try: import lvis assert lvis.__version__ >= '10.5.3' from lvis import LVISResults, LVISEval except AssertionError: raise AssertionError('Incompatible version of lvis is installed. ' 'Run pip uninstall lvis first. Then run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis. ') except ImportError: raise ImportError('Package lvis is not installed. Please run pip ' 'install mmlvis to install open-mmlab forked ' 'lvis.') assert isinstance(results, list), 'results must be a list' assert len(results) == len(self), ( 'The length of results is not equal to the dataset len: {} != {}'. format(len(results), len(self))) metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) if jsonfile_prefix is None: tmp_dir = tempfile.TemporaryDirectory() jsonfile_prefix = osp.join(tmp_dir.name, 'results') else: tmp_dir = None result_files = self.results2json(results, jsonfile_prefix) eval_results = {} # get original api lvis_gt = self.coco for metric in metrics: msg = 'Evaluating {}...'.format(metric) if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results['AR@{}'.format(num)] = ar[i] log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i])) log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError('{} is not in results'.format(metric)) try: lvis_dt = LVISResults(lvis_gt, result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric lvis_eval = LVISEval(lvis_gt, lvis_dt, iou_type) lvis_eval.params.imgIds = self.img_ids if metric == 'proposal': lvis_eval.params.useCats = 0 lvis_eval.params.maxDets = list(proposal_nums) lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() for k, v in lvis_eval.get_results().items(): if k.startswith('AR'): val = float('{:.3f}'.format(float(v))) eval_results[k] = val else: lvis_eval.evaluate() lvis_eval.accumulate() lvis_eval.summarize() lvis_results = lvis_eval.get_results() if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = lvis_eval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(self.cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(self.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = self.coco.load_cats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', f'{float(ap):0.3f}')) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) print_log('\n' + table.table, logger=logger) for k, v in lvis_results.items(): if k.startswith('AP'): key = '{}_{}'.format(metric, k) val = float('{:.3f}'.format(float(v))) eval_results[key] = val ap_summary = ' '.join([ '{}:{:.3f}'.format(k, float(v)) for k, v in lvis_results.items() if k.startswith('AP') ]) eval_results['{}_mAP_copypaste'.format(metric)] = ap_summary lvis_eval.print_results() if tmp_dir is not None: tmp_dir.cleanup() return eval_results
def evaluate(self, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=300, iou_thrs=np.arange(0.5, 0.96, 0.05)): """Evaluation in COCO protocol. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float]): IoU threshold used for evaluating recalls. If set to a list, the average recall of all IoUs will also be computed. Default: 0.5. Returns: dict[str: float] """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError('metric {} is not supported'.format(metric)) result_files = self.format_results(results, jsonfile_prefix) eval_results = {} cocoGt = self.coco for metric in metrics: msg = 'Evaluating {}...'.format(metric) if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = self.fast_eval_recall(results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results['AR@{}'.format(num)] = ar[i] log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i])) log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue if metric not in result_files: raise KeyError('{} is not in results'.format(metric)) try: cocoDt = cocoGt.loadRes(result_files[metric]) except IndexError: print_log('The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break iou_type = 'bbox' if metric == 'proposal' else metric # run lvis evaluation eval_results['lvis'] = {} lvis_eval = LVISEval(self.ann_file_path, result_files[metric], iou_type) lvis_eval.params.max_dets = proposal_nums lvis_eval.run() lvis_eval.print_results() print('=====> The above metric is {}.'.format(iou_type)) keys = lvis_eval.get_results().keys() for k in keys: eval_results['lvis'][iou_type + k] = lvis_eval.get_results()[k] return eval_results
results = evaluate(model, test_loader, args.device) res_path = os.path.join( out_dir, (args.resume).split("/")[-1].split(".")[0] + ".json") json.dump(results, open(res_path, 'w'), indent=4) if args.dataset == 'coco': cocoGt = COCO(annotations) try: cocoDt = cocoGt.loadRes(res_path) except IndexError: print('empty list return zero map') cocoDt.loadAnns() # running evaluation cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() mAP = cocoEval.stats[0] elif (args.dataset == 'lvis'): try: lvis_eval = LVISEval(annotations, res_path, 'bbox') except IndexError: print('empty list return zero map') lvis_eval.run() metrics = lvis_eval.get_results() lvis_eval.print_results() mAP = metrics['AP']
def _evaluate_predictions_on_lvis_per_class(lvis_gt, lvis_results, iou_type, class_names=None): """ Args: iou_type (str): kpt_oks_sigmas (list[float]): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], }[iou_type] logger = logging.getLogger(__name__) if len(lvis_results) == 0: # TODO: check if needed logger.warn("No predictions from the model! Set scores to -1") return {metric: -1 for metric in metrics} if iou_type == "segm": lvis_results = copy.deepcopy(lvis_results) # When evaluating mask AP, if the results contain bbox, LVIS API will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in lvis_results: c.pop("bbox", None) from lvis import LVISEval, LVISResults lvis_results = LVISResults(lvis_gt, lvis_results) lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) lvis_eval.evaluate() lvis_eval.accumulate() precisions = lvis_eval.eval["precision"] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) lvis_eval.summarize() lvis_eval.print_results() # Pull the standard metrics from the LVIS results results = lvis_eval.get_results() results = {metric: float(results[metric] * 100) for metric in metrics} logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) results.update({"AP-" + name: ap for name, ap in results_per_category}) return results