def test_nd_score(self): """ This tests runs the full evaluation for an arbitrary random set of predictions. """ random.seed(42) np.random.seed(42) mdl = DetectionMetricDataList() for class_name in self.cfg.class_names: gt, pred = self._mock_results(30, 3, 25, class_name) for dist_th in self.cfg.dist_ths: mdl.set(class_name, dist_th, accumulate(gt, pred, class_name, center_distance, 2)) metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: ap = calc_ap(mdl[(class_name, dist_th)], self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) for metric_name in TP_METRICS: metric_data = mdl[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in ['attr_err', 'vel_err', 'orient_err']: tp = np.nan elif class_name in ['barrier'] and metric_name in ['attr_err', 'vel_err']: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) self.assertEqual(0.08606662159639042, metrics.nd_score)
def evaluate(self) -> Tuple[DetectionMetrics, DetectionMetricDataList]: """ Performs the actual evaluation. :return: A tuple of high-level and the raw metric data. """ start_time = time.time() # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds. # ----------------------------------- if self.verbose: print('Accumulating metric data...') metric_data_list = DetectionMetricDataList() for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: md, instance_dict = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn_callable, dist_th, False, self.is_average_delay) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data. # ----------------------------------- if self.verbose: print('Calculating metrics...') metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: # Compute APs. for dist_th in self.cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) # Compute TP metrics. for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in [ 'attr_err', 'vel_err', 'orient_err' ]: tp = np.nan elif class_name in ['barrier'] and metric_name in [ 'attr_err', 'vel_err' ]: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) # Compute evaluation time. metrics.add_runtime(time.time() - start_time) return metrics, metric_data_list
def run(self) -> Tuple[DetectionMetrics, MetricDataList]: start_time = time.time() # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds # ----------------------------------- metric_data_list = MetricDataList() for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: md = accumulate(self.gt_boxes, self.pred_boxes, class_name, self.cfg.dist_fcn, dist_th) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data # ----------------------------------- metrics = DetectionMetrics(self.cfg) for class_name in self.cfg.class_names: for dist_th in self.cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, self.cfg.min_recall, self.cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, self.cfg.dist_th_tp)] if class_name in ['traffic_cone'] and metric_name in [ 'attr_err', 'vel_err', 'orient_err' ]: tp = np.nan elif class_name in ['barrier'] and metric_name in [ 'attr_err', 'vel_err' ]: tp = np.nan else: tp = calc_tp(metric_data, self.cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) metrics.add_runtime(time.time() - start_time) # ----------------------------------- # Step 3: Dump the metric data and metrics to disk # ----------------------------------- with open(os.path.join(self.output_dir, 'metrics.json'), 'w') as f: json.dump(metrics.serialize(), f, indent=2) with open(os.path.join(self.output_dir, 'metric_data_list.json'), 'w') as f: json.dump(metric_data_list.serialize(), f, indent=2) return metrics, metric_data_list
def test_serialization(self): """ Test that instance serialization protocol works with json encoding. """ cfg = { 'class_range': { 'car': 1.0, 'truck': 1.0, 'bus': 1.0, 'trailer': 1.0, 'construction_vehicle': 1.0, 'pedestrian': 1.0, 'motorcycle': 1.0, 'bicycle': 1.0, 'traffic_cone': 1.0, 'barrier': 1.0 }, 'dist_fcn': 'distance', 'dist_ths': [0.0, 1.0], 'dist_th_tp': 1.0, 'min_recall': 0.0, 'min_precision': 0.0, 'max_boxes_per_sample': 1, 'mean_ap_weight': 1.0 } detect_config = DetectionConfig.deserialize(cfg) metrics = DetectionMetrics(cfg=detect_config) for i, name in enumerate(cfg['class_range'].keys()): metrics.add_label_ap(name, 1.0, float(i)) for j, tp_name in enumerate(TP_METRICS): metrics.add_label_tp(name, tp_name, float(j)) serialized = json.dumps(metrics.serialize()) deserialized = DetectionMetrics.deserialize(json.loads(serialized)) self.assertEqual(metrics, deserialized)
def eval_main(root_path, info_path, version, res_path, eval_set, output_dir): cfg = config_factory() result_path = res_path # Check result file exists. assert os.path.exists( result_path), 'Error: The result file does not exist!' # Make dirs. if not os.path.isdir(output_dir): os.makedirs(output_dir) # Load data. print('Initializing nuScenes detection evaluation') pred_boxes, meta = load_prediction(result_path, cfg.max_boxes_per_sample, DetectionBox, verbose=True) gt_boxes = load_gt(root_path, info_path, eval_set, DetectionBox, verbose=True) assert set(pred_boxes.sample_tokens) == set(gt_boxes.sample_tokens), \ "Samples in split doesn't match samples in predictions." """ Performs the actual evaluation. :return: A tuple of high-level and the raw metric data. """ start_time = time.time() verbose = True # ----------------------------------- # Step 1: Accumulate metric data for all classes and distance thresholds. # ----------------------------------- if verbose: print('Accumulating metric data...') metric_data_list = DetectionMetricDataList() for class_name in cfg.class_names: for dist_th in cfg.dist_ths: md = accumulate(gt_boxes, pred_boxes, class_name, cfg.dist_fcn_callable, dist_th) metric_data_list.set(class_name, dist_th, md) # ----------------------------------- # Step 2: Calculate metrics from the data. # ----------------------------------- if verbose: print('Calculating metrics...') metrics = DetectionMetrics(cfg) for class_name in cfg.class_names: # Compute APs. for dist_th in cfg.dist_ths: metric_data = metric_data_list[(class_name, dist_th)] ap = calc_ap(metric_data, cfg.min_recall, cfg.min_precision) metrics.add_label_ap(class_name, dist_th, ap) # Compute TP metrics. for metric_name in TP_METRICS: metric_data = metric_data_list[(class_name, cfg.dist_th_tp)] tp = calc_tp(metric_data, cfg.min_recall, metric_name) metrics.add_label_tp(class_name, metric_name, tp) # Compute evaluation time. metrics.add_runtime(time.time() - start_time) # Dump the metric data, meta and metrics to disk. if verbose: print('Saving metrics to: %s' % output_dir) metrics_summary = metrics.serialize() metrics_summary['meta'] = meta.copy() with open(os.path.join(output_dir, 'metrics_summary.json'), 'w') as f: json.dump(metrics_summary, f, indent=2) with open(os.path.join(output_dir, 'metrics_details.json'), 'w') as f: json.dump(metric_data_list.serialize(), f, indent=2) # Print high-level metrics. print('mAP: %.4f' % (metrics_summary['mean_ap'])) err_name_mapping = { 'trans_err': 'mATE', 'scale_err': 'mASE', 'orient_err': 'mAOE', 'vel_err': 'mAVE', 'attr_err': 'mAAE' } for tp_name, tp_val in metrics_summary['tp_errors'].items(): print('%s: %.4f' % (err_name_mapping[tp_name], tp_val)) print('NDS: %.4f' % (metrics_summary['nd_score'])) print('Eval time: %.1fs' % metrics_summary['eval_time']) # Print per-class metrics. print() print('Per-class results:') print('Object Class\tAP\tATE\tASE\tAOE\tAVE\tAAE') class_aps = metrics_summary['mean_dist_aps'] class_tps = metrics_summary['label_tp_errors'] for class_name in class_aps.keys(): print('%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (class_name, class_aps[class_name], class_tps[class_name]['trans_err'], class_tps[class_name]['scale_err'], class_tps[class_name]['orient_err'], class_tps[class_name]['vel_err'], class_tps[class_name]['attr_err']))