def test_nd_score(self): """ This tests runs the full evaluation for an arbitrary random set of predictions. """ random.seed(42) np.random.seed(42) mdl = DetectionMetricDataList() for class_name in self.cfg.class_names: gt, pred = self._mock_results(30, 3, 25, class_name) for dist_th in self.cfg.dist_ths: mdl.set(class_name, dist_th, accumulate(gt, pred, class_name, center_distance, 2)) metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: ap = calc_ap(mdl[(class_name, dist_th)], self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) for metric_name in TP_METRICS: metric_data = mdl[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in ['attr_err', 'vel_err', 'orient_err']: tp = np.nan elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) self.assertEqual(0.08606662159639042, metrics.nd_score)
def evaluate(self) -> Tuple[DetectionMetrics, DetectionMetricDataList]: """ Performs the actual evaluation. :return: A tuple of high-level and the raw metric data. """ start_time = time.time() # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds. # ----------------------------------- if self.verbose: print('Accumulating metric data...') metric_data_list = DetectionMetricDataList() for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: md, instance_dict = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th, False, self.is_average_delay) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data. # ----------------------------------- if self.verbose: print('Calculating metrics...') metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: # Compute APs. for dist_th in self.cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) # Compute TP metrics. for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in [ 'attr_err', 'vel_err', 'orient_err' ]: tp = np.nan elif class_name in ['barrier'] and metric_name in [ 'attr_err', 'vel_err' ]: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) # Compute evaluation time. metrics.add_runtime(time.time() - start_time) return metrics, metric_data_list
def run(self) -> Tuple[DetectionMetrics, MetricDataList]: start_time = time.time() # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds # ----------------------------------- metric_data_list = MetricDataList() for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: md = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn, dist_th) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data # ----------------------------------- metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in [ 'attr_err', 'vel_err', 'orient_err' ]: tp = np.nan elif class_name in ['barrier'] and metric_name in [ 'attr_err', 'vel_err' ]: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) metrics.add_runtime(time.time() - start_time) # ----------------------------------- # Step 3: Dump the metric data and metrics to disk # ----------------------------------- with open(os.path.join(self.output_dir, 'metrics.json'), 'w') as f: json.dump(metrics.serialize(), f, indent=2) with open(os.path.join(self.output_dir, 'metric_data_list.json'), 'w') as f: json.dump(metric_data_list.serialize(), f, indent=2) return metrics, metric_data_list
def get_metric_data(gts: Dict[str, List[Dict]], preds: Dict[str, List[Dict]], detection_name: str, dist_th: float) -> DetectionMetricData: """ Calculate and check the AP value. :param gts: Ground truth data. :param preds: Predictions. :param detection_name: Name of the class we are interested in. :param dist_th: Distance threshold for matching. """ # Some or all of the defaults will be replaced by if given. defaults = {'trans': (0, 0, 0), 'size': (1, 1, 1), 'rot': (0, 0, 0, 0), 'vel': (0, 0), 'attr': 'vehicle.parked', 'score': -1.0, 'name': 'car'} # Create GT EvalBoxes instance. gt_eval_boxes = EvalBoxes() for sample_token, data in gts.items(): gt_boxes = [] for gt in data: gt = {**defaults, **gt} # The defaults will be replaced by gt if given. eb = DetectionBox(sample_token=sample_token, translation=gt['trans'], size=gt['size'], rotation=gt['rot'], detection_name=gt['name'], attribute_name=gt['attr'], velocity=gt['vel']) gt_boxes.append(eb) gt_eval_boxes.add_boxes(sample_token, gt_boxes) # Create Predictions EvalBoxes instance. pred_eval_boxes = EvalBoxes() for sample_token, data in preds.items(): pred_boxes = [] for pred in data: pred = {**defaults, **pred} eb = DetectionBox(sample_token=sample_token, translation=pred['trans'], size=pred['size'], rotation=pred['rot'], detection_name=pred['name'], detection_score=pred['score'], velocity=pred['vel'], attribute_name=pred['attr']) pred_boxes.append(eb) pred_eval_boxes.add_boxes(sample_token, pred_boxes) metric_data = accumulate(gt_eval_boxes, pred_eval_boxes, class_name=detection_name, dist_fcn=center_distance, dist_th=dist_th) return metric_data
def eval_main(root_path, info_path, version, res_path, eval_set, output_dir): cfg = config_factory() result_path = res_path # Check result file exists. assert os.path.exists( result_path), 'Error: The result file does not exist!' # Make dirs. if not os.path.isdir(output_dir): os.makedirs(output_dir) # Load data. print('Initializing nuScenes detection evaluation') pred_boxes, meta = load_prediction(result_path, cfg.max_boxes_per_sample, DetectionBox, verbose=True) gt_boxes = load_gt(root_path, info_path, eval_set, DetectionBox, verbose=True) assert set(pred_boxes.sample_tokens) == set(gt_boxes.sample_tokens), \ "Samples in split doesn't match samples in predictions." """ Performs the actual evaluation. :return: A tuple of high-level and the raw metric data. """ start_time = time.time() verbose = True # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds. # ----------------------------------- if verbose: print('Accumulating metric data...') metric_data_list = DetectionMetricDataList() for class_name in cfg.class_names: for dist_th in cfg.dist_ths: md = accumulate(gt_boxes, pred_boxes, class_name, cfg.dist_fcn_callable, dist_th) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data. # ----------------------------------- if verbose: print('Calculating metrics...') metrics = DetectionMetrics(cfg) for class_name in cfg.class_names: # Compute APs. for dist_th in cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, cfg.min_recall, cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) # Compute TP metrics. for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, cfg.dist_th_tp)] tp = calc_tp(metric_data, cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) # Compute evaluation time. metrics.add_runtime(time.time() - start_time) # Dump the metric data, meta and metrics to disk. if verbose: print('Saving metrics to: %s' % output_dir) metrics_summary = metrics.serialize() metrics_summary['meta'] = meta.copy() with open(os.path.join(output_dir, 'metrics_summary.json'), 'w') as f: json.dump(metrics_summary, f, indent=2) with open(os.path.join(output_dir, 'metrics_details.json'), 'w') as f: json.dump(metric_data_list.serialize(), f, indent=2) # Print high-level metrics. print('mAP: %.4f' % (metrics_summary['mean_ap'])) err_name_mapping = { 'trans_err': 'mATE', 'scale_err': 'mASE', 'orient_err': 'mAOE', 'vel_err': 'mAVE', 'attr_err': 'mAAE' } for tp_name, tp_val in metrics_summary['tp_errors'].items(): print('%s: %.4f' % (err_name_mapping[tp_name], tp_val)) print('NDS: %.4f' % (metrics_summary['nd_score'])) print('Eval time: %.1fs' % metrics_summary['eval_time']) # Print per-class metrics. print() print('Per-class results:') print('Object Class\tAP\tATE\tASE\tAOE\tAVE\tAAE') class_aps = metrics_summary['mean_dist_aps'] class_tps = metrics_summary['label_tp_errors'] for class_name in class_aps.keys(): print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (class_name, class_aps[class_name], class_tps[class_name]['trans_err'], class_tps[class_name]['scale_err'], class_tps[class_name]['orient_err'], class_tps[class_name]['vel_err'], class_tps[class_name]['attr_err']))