Beispiel #1
0
    def test_save_emptyReporterReturnsHeadersOnly(self):
        delim = "\t"
        reporter = RecallReporter(classifiers=[RecallClassifier()],
                                  delim=delim)
        fh = StringIO(newline="")
        report = reporter._generate_report()
        reporter.save_report(report, fh)

        fh.seek(0)
        actual = fh.read()
        expected = delim.join(reporter.columns) + "\n"

        assert actual == expected
Beispiel #2
0
    def test_save_reporterWithTwoClassifiersWritesTwoSamplesWithTwoRows(self):
        primary_correct_record = create_correct_primary_sam_record()
        suppl_incorrect_record = create_incorrect_supplementary_sam_record()
        delim = ","
        classifier1 = create_classifier_with_two_entries(RecallClassifier)
        sample = "sample"
        classifier1.name = sample
        classifier2 = create_classifier_with_two_entries(RecallClassifier)
        sample2 = "sample2"
        classifier2.name = sample2

        reporter = RecallReporter(classifiers=[classifier1, classifier2],
                                  delim=delim)

        fh = StringIO(newline="")
        report = reporter._generate_report()
        reporter.save_report(report, fh)

        fh.seek(0)
        actual = fh.read()
        expected_data = []
        for s in [sample, sample2]:
            for assessment, record in [
                (AlignmentAssessment.PRIMARY_CORRECT, primary_correct_record),
                (AlignmentAssessment.SUPPLEMENTARY_INCORRECT,
                 suppl_incorrect_record),
            ]:
                expected_data.append(
                    [s, record.query_name, record.reference_name, assessment])
        expected = StringIO(newline="")
        pd.DataFrame(
            expected_data,
            columns=[
                "sample",
                "query_probe_header",
                "ref_probe_header",
                "classification",
            ],
        ).to_csv(expected, sep=delim, header=True, index=False)
        expected.seek(0)
        expected = expected.read()

        assert actual == expected
gt_conf_percentiles = snakemake.params.gt_conf_percentiles

# API usage
logging.info(f"Creating masker from {mask_filepath}")
with open(mask_filepath) as bed:
    masker = RecallMasker.from_bed(bed)

for sam_filepath, variant_call_recall_report, gt_conf_percentile in zip(
        sams_filepath, variant_call_recall_reports, gt_conf_percentiles):
    logging.info(f"Masking SAM records")
    with pysam.AlignmentFile(sam_filepath) as sam:
        records = masker.filter_records(sam)

    logging.info("Creating classifier")
    classifier = RecallClassifier(sam=records, name=sample_id)

    logging.info("Creating reporter")
    reporter = RecallReporter(classifiers=[classifier])

    logging.info("Generating report")

    # TODO: we are passing gt_conf_percentile (values in [0, 100, 1]) as gt_conf
    # TODO: fix this? It does not really matter as we use step gt (which is gt_conf_percentile) anyway later
    report = reporter.generate_report(gt_conf_percentile)

    # output
    logging.info("Saving report")
    with open(variant_call_recall_report, "w") as output:
        reporter.save_report(report, output)

logging.info("Done")