Exemple #1
0
        def test_reruns_just_plot_if_plot_changed(self, new_pipegraph):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return dp(df).p9().add_point("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"

            new_pipegraph.new_pipegraph()

            def plot2(df):
                append("out/plot", "B")
                return dp(df).p9().add_point("Y", "X")

            ppg.PlotJob(of, calc, plot2)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "BB"
Exemple #2
0
 def test_changing_skip_caching_same_name_raises(self):
     ppg.PlotJob("a.png", lambda: None, lambda: None)
     with pytest.raises(ValueError):
         ppg.PlotJob("a.png",
                     lambda: None,
                     lambda: None,
                     skip_caching=True)
    def test_reruns_just_plot_if_plot_changed(self):
        import pydataframe

        def calc():
            append('out/calc', 'A')
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        def plot(df):
            append('out/plot', 'B')
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.png'
        job = ppg.PlotJob(of, calc, plot)
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PNG image') != -1)
        self.assertEqual(read('out/calc'), 'A')
        self.assertEqual(read('out/plot'), 'B')

        ppg.new_pipegraph(rc_gen(), quiet=True)

        def plot2(df):
            append('out/plot', 'B')
            return pyggplot.Plot(df).add_scatter('Y', 'X')

        job = ppg.PlotJob(of, calc, plot2)
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PNG image') != -1)
        self.assertEqual(read('out/calc'), 'A')
        self.assertEqual(read('out/plot'), 'BB')
Exemple #4
0
        def test_reruns_both_if_calc_changed(self, new_pipegraph):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"

            new_pipegraph.new_pipegraph()

            def calc2():
                append("out/calc", "A")
                x = 5  # noqa: E157,F841
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            ppg.PlotJob(of, calc2, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "AA"
            assert read("out/plot") == "BB"
Exemple #5
0
        def test_no_rerun_if_ignore_code_changes_and_plot_changes(
                self, new_pipegraph):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"

            new_pipegraph.new_pipegraph()

            def plot2(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("Y", "X")

            job = ppg.PlotJob(of, calc, plot2)
            job.ignore_code_changes()
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"
Exemple #6
0
        def test_plotjob_fails(self):
            def calc():
                return None

            def calc2():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150)),
                    "w": "B"
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            p1 = ppg.PlotJob("out/A.png", calc, plot)
            p2 = ppg.PlotJob("out/B.png", calc2, plot)
            import pathlib

            pc = ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2],
                                     {"facet": "w"})
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], [])
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1],
                                    {"facet": "w"})

            ppg.CombinedPlotJob(pathlib.Path("out/D.png"), [p1, p2], [])
            ppg.CombinedPlotJob(pathlib.Path("out/E.png"), [p1, p2],
                                {"facet": "w"})

            with pytest.raises(ppg.RuntimeError):
                ppg.run_pipegraph()
            assert "did not return a" in str(p1.cache_job.exception)
            assert pc.error_reason == "Indirect"
        def test_reruns_just_plot_if_plot_changed(self):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            self.assertTrue(magic(of).find(b"PNG image") != -1)
            self.assertEqual(read("out/calc"), "A")
            self.assertEqual(read("out/plot"), "B")

            ppg.new_pipegraph(rc_gen(), quiet=True)

            def plot2(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("Y", "X")

            ppg.PlotJob(of, calc, plot2)
            ppg.run_pipegraph()
            self.assertTrue(magic(of).find(b"PNG image") != -1)
            self.assertEqual(read("out/calc"), "A")
            self.assertEqual(read("out/plot"), "BB")
Exemple #8
0
 def test_use_cores(self):
     j = ppg.PlotJob("a.png", lambda: None, lambda: None)
     assert j.cores_needed == 1
     assert j.use_cores(5) is j
     assert j.cores_needed == 1
     assert j.cache_job.cores_needed == 5
     j2 = ppg.PlotJob("b.png",
                      lambda: None,
                      lambda: None,
                      skip_caching=True)
     assert j2.cores_needed == 1
     assert j2.use_cores(5) is j2
     assert j2.cores_needed == 5
Exemple #9
0
        def test_basic(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            def plot2(df):
                p = pyggplot.Plot(df).add_scatter("Y", "X")
                p.width = 5
                p.height = 2
                return p

            of = "out/test.png"
            p = ppg.PlotJob(of, calc, plot)
            p.add_fiddle(lambda p: p.scale_x_log10())
            p.add_another_plot("out/test2.png", plot2)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert os.path.exists(of + ".tsv")
            assert os.path.exists("cache/out/test.png")
            assert os.path.exists("out/test2.png")
            assert not os.path.exists("cache/out/test2.png")
            assert not os.path.exists("cache/out/test2.png.tsv")
 def test_pruning_plotjob(self, new_pipegraph):
     jobA = register_qc(ppg.PlotJob("c.png", lambda: None, lambda: None))
     assert not jobA._pruned
     prune_qc()
     assert jobA._pruned
     assert jobA.cache_job._pruned
     assert jobA.table_job._pruned
Exemple #11
0
 def test_depends_on_with_caching(self):
     of = "out/test.pdf"
     jobA = ppg.PlotJob(of, lambda: 5, lambda: 5)
     jobB = ppg.Job("B")
     jobA.depends_on(jobB)
     assert jobB not in jobA.prerequisites
     assert jobB in jobA.cache_job.prerequisites
     assert jobA.cache_job in jobA.table_job.prerequisites
Exemple #12
0
        def test_complete(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150)),
                    "w": "A"
                })

            def calc2():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150)),
                    "w": "B"
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            p1 = ppg.PlotJob("out/A.png", calc, plot)
            p2 = ppg.PlotJob("out/B.png", calc2, plot)
            import pathlib

            ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], ["w"])
            ppg.CombinedPlotJob(pathlib.Path("out/D.png"), [p1, p2], [])
            ppg.CombinedPlotJob(
                pathlib.Path("out/E.png"),
                [p1, p2],
                {"facets": "w"},
                fiddle=lambda p: p.scale_x_log10(),
            )
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], "w")
            with pytest.raises(TypeError):
                ppg.CombinedPlotJob(5, [p1, p2], "w")
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob("out/D.something", [p1, p2], "w")
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob("out/D.png", [], "w")
            with pytest.raises(ValueError):
                ppg.CombinedPlotJob("out/D.png", [p1, p2.job_id], "w")

            ppg.run_pipegraph()
            assert magic("out/C.png").find(b"PNG image") != -1
            assert magic("out/D.png").find(b"PNG image") != -1
            assert magic("out/E.png").find(b"PNG image") != -1
Exemple #13
0
        def test_redefiniton_and_skip_changes_raises(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return dp(df).p9().add_point("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot)
            with pytest.raises(ValueError):
                ppg.PlotJob(of, calc, plot, skip_caching=True)
            with pytest.raises(ValueError):
                ppg.PlotJob(of, calc, plot, skip_table=True)
            with pytest.raises(ValueError):
                ppg.PlotJob(of, calc, plot, render_args={"something": 55})
Exemple #14
0
    def register_qc_complexity(self):

        output_filename = self.result_dir / f"{self.name}_complexity.png"

        def calc():
            import mbf_bam

            counts = mbf_bam.calculate_duplicate_distribution(
                str(self.bam_filename), str(self.index_filename)
            )
            return pd.DataFrame(
                {
                    "source": self.name,
                    "Repetition count": list(counts.keys()),
                    "Count": list(counts.values()),
                }
            )

        def plot(df):
            import numpy as np

            unique_count = df["Count"].sum()
            total_count = (df["Count"] * df["Repetition count"]).sum()
            pcb = float(unique_count) / total_count
            if pcb >= 0.9:  # pragma: no cover
                severity = "none"
            elif pcb >= 0.8:  # pragma: no cover
                severity = "mild"
            elif pcb >= 0.5:  # pragma: no cover
                severity = "moderate"
            else:
                severity = "severe"
            title = (
                "Genomic positions with repetition count reads\nTotal read count: %i\nPCR Bottleneck coefficient: %.2f (%s)"
                % (total_count, pcb, severity)
            )
            return (
                dp(df)
                .p9()
                .theme_bw()
                .add_point("Repetition count", "Count")
                .add_line("Repetition count", "Count")
                .scale_y_continuous(
                    trans="log2",
                    breaks=[2 ** x for x in range(1, 24)],
                    labels=lambda x: ["2^%0.f" % np.log(xs) for xs in x],
                )
                .title(title)
                .pd
            )

        return register_qc(
            ppg.PlotJob(output_filename, calc, plot)
            .depends_on(self.load())
            .use_cores(-1)
        )
Exemple #15
0
    def register_qc_splicing(self):
        """How many reads were spliced? How many of those splices were known splice sites,
        how many were novel"""
        output_filename = self.result_dir / f"{self.name}_splice_sites.png"

        def calc():
            from mbf_bam import count_introns

            bam_filename, bam_index_name = self.get_bam_names()
            counts_per_chromosome = count_introns(bam_filename, bam_index_name)
            known_splice_sites_by_chr = {
                chr: set()
                for chr in self.genome.get_chromosome_lengths()
            }
            for gene in self.genome.genes.values():
                for start, stop in zip(*gene.introns_all):
                    known_splice_sites_by_chr[gene.chr].add((start, stop))
            total_counts = collections.Counter()
            known_count = 0
            unknown_count = 0
            for chr, counts in counts_per_chromosome.items():
                for k, v in counts.items():
                    if k[0] == 0xFFFFFFFF:
                        intron_counts = 0xFFFFFFFF - k[1]
                        total_counts[intron_counts] += v
                    else:
                        if k in known_splice_sites_by_chr[chr]:
                            known_count += v
                        else:
                            unknown_count += v
            result = {"side": [], "x": [], "count": []}
            result["side"].append("splice sites")
            result["x"].append("unknown")
            result["count"].append(unknown_count)
            result["side"].append("splice sites")
            result["x"].append("known")
            result["count"].append(known_count)

            for x, count in total_counts.items():
                result["side"].append("reads with x splices")
                result["x"].append(x)
                result["count"].append(count)

            return pd.DataFrame(result)

        def plot(df):
            return (dp(df).p9().theme_bw().add_bar(
                "x", "count", stat="identity").facet_wrap(
                    "side", scales="free", ncol=1).scale_y_continuous(
                        labels=lambda xs: ["%.2g" % x for x in xs]).title(
                            self.name).theme(
                                panel_spacing_y=0.2).render(output_filename))

        return register_qc(
            ppg.PlotJob(output_filename, calc,
                        plot).depends_on(self.load()).use_cores(-1))
Exemple #16
0
 def test_prune(self):
     j = ppg.PlotJob(
         "a.png",
         lambda: pd.DataFrame({"sha": [1]}),
         lambda df: dp(df).p9().add_point("sha", "sha"),
     )
     j.prune()
     ppg.run_pipegraph()
     assert not Path("cache/a.png").exists()
     assert not Path("a.png").exists()
    def test_no_rerun_if_calc_change_but_ignore_codechanges(self):
        import pydataframe

        def calc():
            append('out/calc', 'A')
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        def plot(df):
            append('out/plot', 'B')
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.png'
        job = ppg.PlotJob(of, calc, plot)
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PNG image') != -1)
        self.assertEqual(read('out/calc'), 'A')
        self.assertEqual(read('out/plot'), 'B')

        ppg.new_pipegraph(rc_gen(), quiet=True)

        def calc2():
            append('out/calc', 'A')
            x = 5
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        job = ppg.PlotJob(of, calc2, plot)
        job.ignore_code_changes()
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PNG image') != -1)
        self.assertEqual(read('out/calc'), 'A')

        self.assertEqual(read('out/plot'), 'B')
Exemple #18
0
        def test_pdf(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.pdf"
            ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PDF document") != -1
Exemple #19
0
        def test_unpickling_error(self, new_pipegraph):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            p = ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            new_pipegraph.new_pipegraph()
            p = ppg.PlotJob(of, calc, plot)
            with open("cache/out/test.png", "w") as op:
                op.write("no unpickling")
            os.unlink("out/test.png")  # so it reruns
            with pytest.raises(ppg.RuntimeError):
                ppg.run_pipegraph()
            assert not os.path.exists("out/test.png")
            assert isinstance(p.exception, ValueError)
            assert "Unpickling error in file" in str(p.exception)
        def test_no_rerun_if_calc_change_but_ignore_codechanges(self):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            self.assertTrue(magic(of).find(b"PNG image") != -1)
            self.assertEqual(read("out/calc"), "A")
            self.assertEqual(read("out/plot"), "B")

            ppg.new_pipegraph(rc_gen(), quiet=True)

            def calc2():
                append("out/calc", "A")
                x = 5  # noqa: E157,F841
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            job = ppg.PlotJob(of, calc2, plot)
            job.ignore_code_changes()
            ppg.run_pipegraph()
            self.assertTrue(magic(of).find(b"PNG image") != -1)
            self.assertEqual(read("out/calc"), "A")

            self.assertEqual(read("out/plot"), "B")
Exemple #21
0
        def test_plot_job_dependencies_are_added_to_just_the_cache_job(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            dep = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "A"))
            job.depends_on(dep)
            assert dep in job.cache_job.prerequisites
Exemple #22
0
        def test_no_rerun_if_calc_change_but_ignore_codechanges(
                self, new_pipegraph):
            def calc():
                append("out/calc", "A")
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                append("out/plot", "B")
                return dp(df).p9().add_point("X", "Y")

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"

            new_pipegraph.new_pipegraph()

            def calc2():
                append("out/calc", "A")
                x = 5  # noqa: E157,F841
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            job = ppg.PlotJob(of, calc2, plot)
            job.ignore_code_changes()
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert read("out/calc") == "A"
            assert read("out/plot") == "B"
Exemple #23
0
        def test_basic_skip_table(self):
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot, skip_table=True)
            ppg.run_pipegraph()
            assert magic(of).find(b"PNG image") != -1
            assert not os.path.exists(of + ".tsv")
            assert os.path.exists("cache/out/test.png")
Exemple #24
0
        def test_raises_if_calc_returns_non_df(self):
            def calc():
                return None

            def plot(df):
                append("out/plot", "B")
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            try:
                ppg.run_pipegraph()
                raise ValueError("should not be reached")
            except ppg.RuntimeError:
                pass
            assert isinstance(job.cache_job.exception, ppg.JobContractError)
    def test_pdf(self):
        import pydataframe

        def calc():
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        def plot(df):
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.pdf'
        job = ppg.PlotJob(of, calc, plot)
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PDF document') != -1)
        def test_basic(self):
            ppg.new_pipegraph(rc_gen(), quiet=False)

            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return pyggplot.Plot(df).add_scatter("X", "Y")

            of = "out/test.png"
            ppg.PlotJob(of, calc, plot)
            ppg.run_pipegraph()
            self.assertTrue(magic(of).find(b"PNG image") != -1)
    def test_basic(self):
        ppg.new_pipegraph(rc_gen(), quiet=False)
        import pydataframe

        def calc():
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        def plot(df):
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.png'
        job = ppg.PlotJob(of, calc, plot)
        ppg.run_pipegraph()
        self.assertTrue(magic(of).find('PNG image') != -1)
    def test_plot_job_dependencies_are_added_to_just_the_cache_job(self):
        import pydataframe

        def calc():
            return pydataframe.DataFrame({
                "X": list(range(0, 100)),
                'Y': list(range(50, 150))
            })

        def plot(df):
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.png'
        job = ppg.PlotJob(of, calc, plot)
        dep = ppg.FileGeneratingJob('out/A', lambda: write('out/A', 'A'))
        job.depends_on(dep)
        #self.assertTrue(dep in job.prerequisites)
        self.assertTrue(dep in job.cache_job.prerequisites)
    def test_raises_if_calc_returns_non_df(self):
        #import pydataframe
        def calc():
            return None

        def plot(df):
            append('out/plot', 'B')
            return pyggplot.Plot(df).add_scatter('X', 'Y')

        of = 'out/test.png'
        job = ppg.PlotJob(of, calc, plot)
        try:
            ppg.run_pipegraph()
            raise ValueError("should not be reached")
        except ppg.RuntimeError:
            pass
        self.assertTrue(
            isinstance(job.cache_job.exception, ppg.JobContractError))
Exemple #30
0
        def test_raises_if_plot_returns_non_plot(self):
            # import pyggplot
            def calc():
                return pd.DataFrame({
                    "X": list(range(0, 100)),
                    "Y": list(range(50, 150))
                })

            def plot(df):
                return None

            of = "out/test.png"
            job = ppg.PlotJob(of, calc, plot)
            try:
                ppg.run_pipegraph()
                raise ValueError("should not be reached")
            except ppg.RuntimeError:
                pass
            assert isinstance(job.exception, ppg.JobContractError)