def test_volcano_plot(self):
        ppg.util.global_pipegraph.quiet = False
        import mbf_sampledata

        pasilla_data = pd.read_csv(
            mbf_sampledata.get_sample_path(
                "mbf_comparisons/pasillaCount_deseq2.tsv.gz"),
            sep=" ",
        )
        # pasilla_data = pasilla_data.set_index('Gene')
        pasilla_data.columns = [str(x) for x in pasilla_data.columns]
        treated = [x for x in pasilla_data.columns if x.startswith("treated")]
        untreated = [
            x for x in pasilla_data.columns if x.startswith("untreated")
        ]
        pasilla_data = DelayedDataFrame("pasilla", pasilla_data)
        comp = Comparisons(pasilla_data, {
            "treated": treated,
            "untreated": untreated
        }).a_vs_b("treated", "untreated", TTest())
        comp.filter([("log2FC", "|>=", 2.0), ("FDR", "<=", 0.05)])
        prune_qc(lambda job: "volcano" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        print(qc_jobs)
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])
Example #2
0
    def test_subtraction_by_read(self):
        from mbf_sampledata import get_human_22_fake_genome

        genome = get_human_22_fake_genome()
        lane = mbf_align.AlignedSample(
            "test_lane",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane2 = mbf_align.AlignedSample(
            "test_lane2",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA124",
        )  # index creation is automatic
        lane3 = mbf_align.AlignedSample(
            "test_lane3",
            get_sample_data(Path("mbf_align/chipseq_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane3_subset = mbf_align.AlignedSample(
            "test_lane3_subset",
            get_sample_data(Path("mbf_align/chipseq_chr22_subset.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic

        lane_empty = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane2), new_name="empty")
        lane_full = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3), new_name="full")
        lane_some = lane3.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3_subset),
            result_dir="results/aligned/shu",
        )
        qc_jobs = [
            lane_some.post_processor_qc_jobs, lane_full.post_processor_qc_jobs
        ]
        prune_qc(lambda job: job in qc_jobs)
        ppg.run_pipegraph()
        assert Path(lane_empty.get_bam_names()[1]).exists()
        assert Path(lane_full.get_bam_names()[1]).exists()
        assert lane_empty.mapped_reads() == 0
        assert lane_full.mapped_reads() == lane.mapped_reads()
        assert lane.mapped_reads() != 0
        assert (lane_some.mapped_reads() == lane3.mapped_reads() -
                lane3_subset.mapped_reads())
        assert lane3_subset.mapped_reads(
        )  # make sure there was something to subtract
        assert "shu" in lane_some.get_bam_names()[0]
        assert_image_equal(qc_jobs[0].filenames[0], "_result_dir")
        assert_image_equal(qc_jobs[0].filenames[0])
 def test_assert_images_equal_inside_class(self):
     assert_image_equal(
         Path(__file__).parent / "base_images" / "test_qc" / "_" /
         "test_assert_images_equal.png")
     with pytest.raises(ValueError):
         assert_image_equal(
             Path(__file__).parent / "base_images" / "test_qc" / "_" /
             "test_assert_images_equal.png",
             "_b",
         )
Example #4
0
 def _test_qc_plots(self, filename, remaining_job_count, chdir="."):
     lane = self.prep_lane()
     prune_qc(lambda job: filename in job.job_id)
     not_pruned_count = sum([1 for x in get_qc_jobs() if not x._pruned])
     assert not_pruned_count == remaining_job_count  # plot cache, plot_table, plot
     ppg.run_pipegraph()
     if chdir == '..':
         fn = lane.result_dir / chdir / filename
     else:
         fn = lane.result_dir / chdir / f"{lane.name}_{filename}"
     assert_image_equal(fn, suffix="_" + filename)
Example #5
0
 def test_very_simple(self):
     df = pd.DataFrame({
         "a1": [0, 1, 2],
         "a2": [0.5, 1.5, 2.5],
         "b1": [2, 1, 0],
         "b2": [2.5, 0.5, 1],
     })
     ddf = DelayedDataFrame("test", df)
     of = "test.png"
     h = HeatmapPlot(ddf, df.columns, of, heatmap_norm.Unchanged(),
                     heatmap_order.Unchanged())
     run_pipegraph()
     assert_image_equal(h.output_filename)
    def test_smooth(self, new_pipegraph_no_qc):
        genome = get_human_22_fake_genome()
        df = pd.DataFrame(
            [
                {
                    "chr": "chr22",
                    "start": 36925 * 1000 - 1000,
                    "stop": 36925 * 1000 + 1000,
                },
                {
                    "chr": "chr22",
                    "start": 31485 * 1000 - 2000,
                    "stop": 31485 * 1000 + 2000,
                },
                {"chr": "chr22", "start": 41842 * 1000, "stop": (41842 * 1000) + 1},
            ]
        )
        plot_regions = mbf_genomics.regions.GenomicRegions(
            "testregions", lambda: df, [], genome
        )
        lane1 = mbf_align.lanes.AlignedSample(
            "one",
            mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"),
            genome,
            False,
            None,
        )
        lane2 = mbf_align.lanes.AlignedSample(
            "two",
            mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"),
            genome,
            False,
            None,
        )

        h = mbf_heatmap.chipseq.Heatmap(
            plot_regions,
            [lane1, lane2],
            region_strategy=regions.RegionFromCenter(1000),
            smoothing_strategy=smooth.SmoothExtendedReads(),
        )
        fn = "test.png"
        h.plot(fn, norm.AsIs(), order.FirstLaneSum())
        ppg.run_pipegraph()
        assert_image_equal(fn)
Example #7
0
 def test_venn_from_logfcs(self):
     ppg.util.global_pipegraph.quiet = False
     d = DelayedDataFrame(
         "ex1",
         pd.DataFrame(
             {
                 "gene_stable_id": ["A", "B", "C", "D", "E"],
                 "a": [1, 1, 1, 1, 1],
                 "b": [1, 2, 3, 4, 5],
                 "c": [1, 1, 3, 0.5, 0.75],
             }
         ),
     )
     comp = Comparisons(d, {"a": ["a"], "b": ["b"], "c": ["c"]})
     a = comp.all_vs_b("a", Log2FC())
     selected = {name: x.filter([("log2FC", "|>=", 1)]) for name, x in a.items()}
     plot_job = venn.plot_venn("test", selected)
     ppg.run_pipegraph()
     assert_image_equal(plot_job.filenames[0], "_down")
     assert_image_equal(plot_job.filenames[1], "_up")
Example #8
0
 def test_hierarchical_pearson(self):
     df = pd.DataFrame({
         "a1": [0, 1, 2],
         "a2": [0.5, 1.5, 2.5],
         "b1": [2, 1, 0],
         "b2": [0.5, 0.5, 1],
     })
     df = df.sample(200, replace=True, random_state=500)
     np.random.seed(500)
     df += np.random.normal(0, 1, df.shape)
     ddf = DelayedDataFrame("test", df)
     of = "test.png"
     h = HeatmapPlot(
         ddf,
         df.columns,
         of,
         heatmap_norm.Unchanged(),
         heatmap_order.HierarchicalPearson(),
     )
     run_pipegraph()
     assert_image_equal(h.output_filename)
    def test_ma_plot(self):
        ppg.util.global_pipegraph.quiet = False
        pasilla_data, treated, untreated = get_pasilla_data_subset()
        import numpy

        numpy.random.seed(500)

        comp = Comparisons(pasilla_data, {
            "treated": treated,
            "untreated": untreated
        }).a_vs_b("treated", "untreated", TTest(), laplace_offset=1)

        comp.filter([
            ("log2FC", "|>=", 2.0),
            # ('FDR', '<=', 0.05),
        ])
        prune_qc(lambda job: "ma_plot" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])
    def test_simple(self, new_pipegraph_no_qc):
        genome = get_human_22_fake_genome()
        start = 17750239
        df = pd.DataFrame(
            [
                {"chr": "chr22", "start": start, "stop": start + 1000},
                {"chr": "chr22", "start": start + 20000, "stop": start + 20000 + 1000},
                {"chr": "chr22", "start": start + 30000, "stop": start + 30000 + 1000},
            ]
        )
        plot_regions = mbf_genomics.regions.GenomicRegions(
            "testregions", lambda: df, [], genome
        )
        lane1 = mbf_align.lanes.AlignedSample(
            "one",
            mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"),
            genome,
            False,
            None,
        )
        lane2 = mbf_align.lanes.AlignedSample(
            "two",
            mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"),
            genome,
            False,
            None,
        )

        h = mbf_heatmap.chipseq.Heatmap(
            plot_regions,
            [lane1, lane2],
            region_strategy=regions.RegionAsIs(),
            smoothing_strategy=smooth.SmoothRaw(),
        )
        fn = "test.png"
        h.plot(fn, norm.AsIs(), order.AsIs())
        ppg.run_pipegraph()
        assert_image_equal(fn)
    def test_correlation(self):
        ppg.util.global_pipegraph.quiet = False
        import mbf_sampledata

        pasilla_data = pd.read_csv(
            mbf_sampledata.get_sample_path(
                "mbf_comparisons/pasillaCount_deseq2.tsv.gz"),
            sep=" ",
        )
        # pasilla_data = pasilla_data.set_index('Gene')
        pasilla_data.columns = [str(x) for x in pasilla_data.columns]
        treated = [x for x in pasilla_data.columns if x.startswith("treated")]
        untreated = [
            x for x in pasilla_data.columns if x.startswith("untreated")
        ]
        pasilla_data = DelayedDataFrame("pasilla", pasilla_data)
        Comparisons(pasilla_data, {"treated": treated, "untreated": untreated})
        prune_qc(lambda job: "correlation" in job.job_id)
        run_pipegraph()
        qc_jobs = list(get_qc_jobs())
        qc_jobs = [x for x in qc_jobs if not x._pruned]
        print(qc_jobs)
        assert len(qc_jobs) == 1
        assert_image_equal(qc_jobs[0].filenames[0])
def test_assert_images_equal():
    assert_image_equal(
        Path(__file__).parent / "base_images" / "test_qc" / "_" /
        "test_assert_images_equal.png")
    with pytest.raises(
            ValueError) as e:  # here the baseline image does not exist
        assert_image_equal(
            Path(__file__).parent / "base_images" / "test_qc" / "_" /
            "test_assert_images_equal.png",
            "_b",
        )
    # should_path overwrites suffix
    assert_image_equal(
        Path(__file__).parent / "base_images" / "test_qc" / "_" /
        "test_assert_images_equal.png",
        "_b",
        should_path=Path(__file__).parent / "base_images" / "test_qc" / "_" /
        "test_assert_images_equal.png",
    )

    assert "Base_line image not found" in str(e.value)
    with pytest.raises(ValueError) as e:  # here it is different
        assert_image_equal(
            Path(__file__).parent / "base_images" / "test_qc" / "_" /
            "test_assert_images_equal.png",
            suffix="_c",
        )
    assert "Image files did not match" in str(e.value)

    with pytest.raises(IOError) as e:
        assert_image_equal("does not exist")
    assert "not created" in str(e.value)

    # with pytest.raises(ValueError) as e: #here it is different
    with pytest.raises(ValueError) as e:  # here it is different
        assert_image_equal(
            Path(__file__).parent / "base_images" / "test_qc" / "_" /
            "test_assert_images_equal.png",
            suffix="_d",
        )
    assert "do not match expected size" in str(e.value)