def test_pruning_plotjob(self, new_pipegraph):
     jobA = register_qc(ppg.PlotJob("c.png", lambda: None, lambda: None))
     assert not jobA._pruned
     prune_qc()
     assert jobA._pruned
     assert jobA.cache_job._pruned
     assert jobA.table_job._pruned
    def test_registration_and_pruning(self, new_pipegraph):
        with pytest.raises(TypeError):
            register_qc("shu")
        jobA = ppg.FileGeneratingJob("a",
                                     lambda: Path("a").write_text("hello"))
        register_qc(jobA)
        print(list(get_qc_jobs()))
        assert jobA in list(get_qc_jobs())
        assert not jobA._pruned
        jobc = register_qc(
            ppg.FileGeneratingJob("c", lambda: Path("b").write_text("hello")))

        def check_prune(job):
            return job.job_id.lower()[-1] == "c"

        prune_qc(check_prune)
        assert jobc in list(get_qc_jobs())
        assert not jobc._pruned
        jobB = register_qc(
            ppg.FileGeneratingJob("b", lambda: Path("b").write_text("hello")))
        assert jobB in list(get_qc_jobs())
        assert jobB._pruned
        jobC = register_qc(
            ppg.FileGeneratingJob("C", lambda: Path("b").write_text("hello")))
        assert not jobC._pruned
        assert len(list(get_qc_jobs())) == 4
        prune_qc()
        assert jobA._pruned
        assert jobB._pruned
        assert jobc._pruned
        assert jobC._pruned
        for j in get_qc_jobs():
            assert j._pruned
    def test_volcano_plot(self):
        ppg.util.global_pipegraph.quiet = False
        import mbf_sampledata

        pasilla_data = pd.read_csv(
            mbf_sampledata.get_sample_path(
                "mbf_comparisons/pasillaCount_deseq2.tsv.gz"),
            sep=" ",
        )
        # pasilla_data = pasilla_data.set_index('Gene')
        pasilla_data.columns = [str(x) for x in pasilla_data.columns]
        treated = [x for x in pasilla_data.columns if x.startswith("treated")]
        untreated = [
            x for x in pasilla_data.columns if x.startswith("untreated")
        ]
        pasilla_data = DelayedDataFrame("pasilla", pasilla_data)
        comp = Comparisons(pasilla_data, {
            "treated": treated,
            "untreated": untreated
        }).a_vs_b("treated", "untreated", TTest())
        comp.filter([("log2FC", "|>=", 2.0), ("FDR", "<=", 0.05)])
        prune_qc(lambda job: "volcano" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        print(qc_jobs)
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])
Esempio n. 4
0
def test_align_and_extract_umis(new_pipegraph):
    from mbf_align.post_process import AnnotateFastqBarcodes

    for folder in [
            get_sample_path(Path("mbf_align/sample_extract_barcodes")),
            get_sample_path(Path("mbf_align/sample_extract_barcodes_gz")),
    ]:
        new_pipegraph.new_pipegraph()
        genome = get_human_22_fake_genome()

        mbf_qualitycontrol.prune_qc(lambda _: False)
        r = Sample("test",
                   str(folder),
                   False,
                   pairing="only_second",
                   vid="AA123")
        al = AlignedSample("test", str(folder / "test.bam"), genome, False,
                           "AA123")

        x = al.post_process(
            AnnotateFastqBarcodes(r, {
                "XC": [0, 4],
                "XM": [7, 7 + 4]
            }))
        ppg.run_pipegraph()
        f = x.get_bam()
        r = next(f.fetch())
        print(r.tags)
        assert r.get_tag("XC") == "AGTC"
        assert r.get_tag("XM") == "TGAC"
Esempio n. 5
0
    def test_subtraction_by_read(self):
        from mbf_sampledata import get_human_22_fake_genome

        genome = get_human_22_fake_genome()
        lane = mbf_align.AlignedSample(
            "test_lane",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane2 = mbf_align.AlignedSample(
            "test_lane2",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA124",
        )  # index creation is automatic
        lane3 = mbf_align.AlignedSample(
            "test_lane3",
            get_sample_data(Path("mbf_align/chipseq_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane3_subset = mbf_align.AlignedSample(
            "test_lane3_subset",
            get_sample_data(Path("mbf_align/chipseq_chr22_subset.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic

        lane_empty = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane2), new_name="empty")
        lane_full = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3), new_name="full")
        lane_some = lane3.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3_subset),
            result_dir="results/aligned/shu",
        )
        qc_jobs = [
            lane_some.post_processor_qc_jobs, lane_full.post_processor_qc_jobs
        ]
        prune_qc(lambda job: job in qc_jobs)
        ppg.run_pipegraph()
        assert Path(lane_empty.get_bam_names()[1]).exists()
        assert Path(lane_full.get_bam_names()[1]).exists()
        assert lane_empty.mapped_reads() == 0
        assert lane_full.mapped_reads() == lane.mapped_reads()
        assert lane.mapped_reads() != 0
        assert (lane_some.mapped_reads() == lane3.mapped_reads() -
                lane3_subset.mapped_reads())
        assert lane3_subset.mapped_reads(
        )  # make sure there was something to subtract
        assert "shu" in lane_some.get_bam_names()[0]
        assert_image_equal(qc_jobs[0].filenames[0], "_result_dir")
        assert_image_equal(qc_jobs[0].filenames[0])
Esempio n. 6
0
 def _test_qc_plots(self, filename, remaining_job_count, chdir="."):
     lane = self.prep_lane()
     prune_qc(lambda job: filename in job.job_id)
     not_pruned_count = sum([1 for x in get_qc_jobs() if not x._pruned])
     assert not_pruned_count == remaining_job_count  # plot cache, plot_table, plot
     ppg.run_pipegraph()
     if chdir == '..':
         fn = lane.result_dir / chdir / filename
     else:
         fn = lane.result_dir / chdir / f"{lane.name}_{filename}"
     assert_image_equal(fn, suffix="_" + filename)
    def test_ma_plot(self):
        ppg.util.global_pipegraph.quiet = False
        pasilla_data, treated, untreated = get_pasilla_data_subset()
        import numpy

        numpy.random.seed(500)

        comp = Comparisons(pasilla_data, {
            "treated": treated,
            "untreated": untreated
        }).a_vs_b("treated", "untreated", TTest(), laplace_offset=1)

        comp.filter([
            ("log2FC", "|>=", 2.0),
            # ('FDR', '<=', 0.05),
        ])
        prune_qc(lambda job: "ma_plot" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])
    def test_correlation(self):
        ppg.util.global_pipegraph.quiet = False
        import mbf_sampledata

        pasilla_data = pd.read_csv(
            mbf_sampledata.get_sample_path(
                "mbf_comparisons/pasillaCount_deseq2.tsv.gz"),
            sep=" ",
        )
        # pasilla_data = pasilla_data.set_index('Gene')
        pasilla_data.columns = [str(x) for x in pasilla_data.columns]
        treated = [x for x in pasilla_data.columns if x.startswith("treated")]
        untreated = [
            x for x in pasilla_data.columns if x.startswith("untreated")
        ]
        pasilla_data = DelayedDataFrame("pasilla", pasilla_data)
        Comparisons(pasilla_data, {"treated": treated, "untreated": untreated})
        prune_qc(lambda job: "correlation" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        print(qc_jobs)
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])