def test_calculatePrecision_OneReportWithThreeRowsTwoPartiallyCorrectOneBelowThreshold( self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame( data=[ create_precision_report_row(0.4, gt_conf=100), create_precision_report_row(0.8, gt_conf=20), create_precision_report_row(0.3, gt_conf=100), ], columns=columns, ) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) confidence_threshold = 80 actual = calculator._calculate_precision_for_a_given_confidence( confidence_threshold) assert actual.precision == 0.7 / 2.0 assert actual.true_positives == 0.7 assert actual.total == 2.0
def test_calculatePrecision_NoReportsRaisesEmptyReportError(self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame(columns=columns) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) with pytest.raises(EmptyReportError): calculator._calculate_precision_for_a_given_confidence()
def test_calculatePrecision_OneReportWithOneRowCompletelyCorrectBelowConfThreasholdRaisesEmptyReportError( self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame(data=[create_precision_report_row(1.0, gt_conf=10)], columns=columns) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) confidence_threshold = 60 with pytest.raises(EmptyReportError): calculator._calculate_precision_for_a_given_confidence( confidence_threshold)
def test_calculatePrecision_OneReportWithOneRowCompletelyIncorrectReturnsZero( self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame(data=[create_precision_report_row(0.0, gt_conf=100)], columns=columns) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) actual = calculator._calculate_precision_for_a_given_confidence() assert actual.precision == 0.0 assert actual.true_positives == 0.0 assert actual.total == 1.0
def test_calculatePrecision_OneReportWithOneRowCompletelyCorrectEqualConfThreasholdReturnsOne( self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame(data=[create_precision_report_row(1.0, gt_conf=60)], columns=columns) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) confidence_threshold = 60 actual = calculator._calculate_precision_for_a_given_confidence( confidence_threshold) assert actual.precision == 1.0 assert actual.true_positives == 1.0 assert actual.total == 1.0
def test_calculatePrecision_OneReportWithTwoRowsPartiallyCorrect(self): columns = [ "sample", "query_probe_header", "ref_probe_header", "classification" ] df = pd.DataFrame( data=[ create_precision_report_row(0.5, gt_conf=100), create_precision_report_row(0.7, gt_conf=100), ], columns=columns, ) report = PrecisionReport([df]) calculator = PrecisionCalculator(report) actual = calculator._calculate_precision_for_a_given_confidence() assert actual.precision == 1.2 / 2 assert actual.true_positives == 1.2 assert actual.total == 2.0
snakemake.input.precision_report_files_for_all_samples) output = Path(snakemake.output.precision_file_for_all_samples) gt_conf_percentiles = snakemake.params.gt_conf_percentiles tool = snakemake.wildcards.tool coverage = snakemake.wildcards.coverage coverage_threshold = snakemake.wildcards.coverage_threshold strand_bias_threshold = snakemake.wildcards.strand_bias_threshold gaps_threshold = snakemake.wildcards.gaps_threshold # API usage logging.info(f"Loading report") precision_report = PrecisionReport.from_files( precision_report_files_for_all_samples) logging.info(f"Creating calculator") precision_calculator = PrecisionCalculator(precision_report) logging.info(f"Calculating precision") precision_df = precision_calculator.get_precision_report(gt_conf_percentiles) metadata_df = pd.DataFrame( data={ "tool": [tool] * len(precision_df), "coverage": [coverage] * len(precision_df), "coverage_threshold": [coverage_threshold] * len(precision_df), "strand_bias_threshold": [strand_bias_threshold] * len(precision_df), "gaps_threshold": [gaps_threshold] * len(precision_df), }) output_df = pd.concat([precision_df, metadata_df], axis=1) # output