def test_set_ground_truth_with_no_ground_truth(): """set_ground_truth() must raise a PSDSEvalError when GT is None""" metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t") psds_eval = PSDSEval() with pytest.raises(PSDSEvalError, match="The ground truth cannot be set without data"): psds_eval.set_ground_truth(None, metadata)
def test_set_ground_truth_with_bad_metadata(bad_data): """Setting the ground truth with invalid metadata must raise an error""" gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t") psds_eval = PSDSEval() with pytest.raises(PSDSEvalError, match="The metadata data must be " "provided in a pandas.DataFrame"): psds_eval.set_ground_truth(gt, bad_data)
def test_setting_ground_truth_more_than_once(): """Ensure that an error is raised when the ground truth is set twice""" gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t") metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t") psds_eval = PSDSEval(metadata=metadata, ground_truth=gt) with pytest.raises(PSDSEvalError, match="You cannot set the ground truth " "more than once per evaluation"): psds_eval.set_ground_truth(gt_t=gt, meta_t=metadata)
def test_set_ground_truth_with_overlapping_events(table_name, raise_error): """Gronud truth with overlapping events must raise an error""" metadata = pd.read_csv(os.path.join(DATADIR, "test.metadata"), sep="\t") gt = pd.read_csv(os.path.join(DATADIR, table_name), sep="\t") psds_eval = PSDSEval() if raise_error: with pytest.raises( PSDSEvalError, match="The ground truth dataframe provided has intersecting " "events/labels for the same class."): psds_eval.set_ground_truth(gt, metadata) else: psds_eval.set_ground_truth(gt, metadata) assert isinstance(psds_eval.ground_truth, pd.DataFrame) is True
def test_compute_f_score_gt_later(metadata): """Test computation is correct when gt is not passed at init time""" det_t, gt_t = read_gt_and_det() psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3) psds_eval.set_ground_truth(gt_t, metadata) f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t) expected_class_f = [ 0.7752161383285303, 0.7421383647798742, 0.548936170212766, 0.44747612551159616, 0.6548881036513545, 0.7663551401869159, 0.9405405405405406, 0.6978021978021978, 0.7102941176470589, 0.8427672955974843 ] assert f_avg == pytest.approx( 0.712641), "The average F-score was incorrect" for exp_f, class_f in zip(expected_class_f, per_class_f.values()): assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def test_compute_f_score_gt_later(metadata): """Test computation is correct when gt is not passed at init time""" det_t, gt_t = read_gt_and_det() psds_eval = PSDSEval(dtc_threshold=0.5, gtc_threshold=0.5, cttc_threshold=0.3) psds_eval.set_ground_truth(gt_t, metadata) f_avg, per_class_f = psds_eval.compute_macro_f_score(det_t) expected_class_f = [ 0.7752161383285303, 0.7468354430379747, 0.548936170212766, 0.39943342776203966, 0.6548881036513545, 0.7663551401869159, 0.9405405405405406, 0.6978021978021978, 0.7105553512320706, 0.8427672955974843 ] assert f_avg == pytest.approx(0.7083329808351875), \ "The average F-score was incorrect" for exp_f, class_f in zip(expected_class_f, per_class_f.values()): assert exp_f == pytest.approx(class_f), "Per-class F-score incorrect"
def test_set_ground_truth_with_no_metadata(): """set_ground_truth() must raise a PSDSEvalError with None metadata""" gt = pd.read_csv(os.path.join(DATADIR, "test_1.gt"), sep="\t") psds_eval = PSDSEval() with pytest.raises(PSDSEvalError, match="Audio metadata is required"): psds_eval.set_ground_truth(gt, None)