def test_reruns_just_plot_if_plot_changed(self, new_pipegraph): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return dp(df).p9().add_point("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B" new_pipegraph.new_pipegraph() def plot2(df): append("out/plot", "B") return dp(df).p9().add_point("Y", "X") ppg.PlotJob(of, calc, plot2) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "BB"
def test_changing_skip_caching_same_name_raises(self): ppg.PlotJob("a.png", lambda: None, lambda: None) with pytest.raises(ValueError): ppg.PlotJob("a.png", lambda: None, lambda: None, skip_caching=True)
def test_reruns_just_plot_if_plot_changed(self): import pydataframe def calc(): append('out/calc', 'A') return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) def plot(df): append('out/plot', 'B') return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.png' job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find('PNG image') != -1) self.assertEqual(read('out/calc'), 'A') self.assertEqual(read('out/plot'), 'B') ppg.new_pipegraph(rc_gen(), quiet=True) def plot2(df): append('out/plot', 'B') return pyggplot.Plot(df).add_scatter('Y', 'X') job = ppg.PlotJob(of, calc, plot2) ppg.run_pipegraph() self.assertTrue(magic(of).find('PNG image') != -1) self.assertEqual(read('out/calc'), 'A') self.assertEqual(read('out/plot'), 'BB')
def test_reruns_both_if_calc_changed(self, new_pipegraph): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B" new_pipegraph.new_pipegraph() def calc2(): append("out/calc", "A") x = 5 # noqa: E157,F841 return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) ppg.PlotJob(of, calc2, plot) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "AA" assert read("out/plot") == "BB"
def test_no_rerun_if_ignore_code_changes_and_plot_changes( self, new_pipegraph): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B" new_pipegraph.new_pipegraph() def plot2(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("Y", "X") job = ppg.PlotJob(of, calc, plot2) job.ignore_code_changes() ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B"
def test_plotjob_fails(self): def calc(): return None def calc2(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)), "w": "B" }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") p1 = ppg.PlotJob("out/A.png", calc, plot) p2 = ppg.PlotJob("out/B.png", calc2, plot) import pathlib pc = ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], {"facet": "w"}) with pytest.raises(ValueError): ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], []) with pytest.raises(ValueError): ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1], {"facet": "w"}) ppg.CombinedPlotJob(pathlib.Path("out/D.png"), [p1, p2], []) ppg.CombinedPlotJob(pathlib.Path("out/E.png"), [p1, p2], {"facet": "w"}) with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert "did not return a" in str(p1.cache_job.exception) assert pc.error_reason == "Indirect"
def test_reruns_just_plot_if_plot_changed(self): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find(b"PNG image") != -1) self.assertEqual(read("out/calc"), "A") self.assertEqual(read("out/plot"), "B") ppg.new_pipegraph(rc_gen(), quiet=True) def plot2(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("Y", "X") ppg.PlotJob(of, calc, plot2) ppg.run_pipegraph() self.assertTrue(magic(of).find(b"PNG image") != -1) self.assertEqual(read("out/calc"), "A") self.assertEqual(read("out/plot"), "BB")
def test_use_cores(self): j = ppg.PlotJob("a.png", lambda: None, lambda: None) assert j.cores_needed == 1 assert j.use_cores(5) is j assert j.cores_needed == 1 assert j.cache_job.cores_needed == 5 j2 = ppg.PlotJob("b.png", lambda: None, lambda: None, skip_caching=True) assert j2.cores_needed == 1 assert j2.use_cores(5) is j2 assert j2.cores_needed == 5
def test_basic(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") def plot2(df): p = pyggplot.Plot(df).add_scatter("Y", "X") p.width = 5 p.height = 2 return p of = "out/test.png" p = ppg.PlotJob(of, calc, plot) p.add_fiddle(lambda p: p.scale_x_log10()) p.add_another_plot("out/test2.png", plot2) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert os.path.exists(of + ".tsv") assert os.path.exists("cache/out/test.png") assert os.path.exists("out/test2.png") assert not os.path.exists("cache/out/test2.png") assert not os.path.exists("cache/out/test2.png.tsv")
def test_pruning_plotjob(self, new_pipegraph): jobA = register_qc(ppg.PlotJob("c.png", lambda: None, lambda: None)) assert not jobA._pruned prune_qc() assert jobA._pruned assert jobA.cache_job._pruned assert jobA.table_job._pruned
def test_depends_on_with_caching(self): of = "out/test.pdf" jobA = ppg.PlotJob(of, lambda: 5, lambda: 5) jobB = ppg.Job("B") jobA.depends_on(jobB) assert jobB not in jobA.prerequisites assert jobB in jobA.cache_job.prerequisites assert jobA.cache_job in jobA.table_job.prerequisites
def test_complete(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)), "w": "A" }) def calc2(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)), "w": "B" }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") p1 = ppg.PlotJob("out/A.png", calc, plot) p2 = ppg.PlotJob("out/B.png", calc2, plot) import pathlib ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], ["w"]) ppg.CombinedPlotJob(pathlib.Path("out/D.png"), [p1, p2], []) ppg.CombinedPlotJob( pathlib.Path("out/E.png"), [p1, p2], {"facets": "w"}, fiddle=lambda p: p.scale_x_log10(), ) with pytest.raises(ValueError): ppg.CombinedPlotJob(pathlib.Path("out/C.png"), [p1, p2], "w") with pytest.raises(TypeError): ppg.CombinedPlotJob(5, [p1, p2], "w") with pytest.raises(ValueError): ppg.CombinedPlotJob("out/D.something", [p1, p2], "w") with pytest.raises(ValueError): ppg.CombinedPlotJob("out/D.png", [], "w") with pytest.raises(ValueError): ppg.CombinedPlotJob("out/D.png", [p1, p2.job_id], "w") ppg.run_pipegraph() assert magic("out/C.png").find(b"PNG image") != -1 assert magic("out/D.png").find(b"PNG image") != -1 assert magic("out/E.png").find(b"PNG image") != -1
def test_redefiniton_and_skip_changes_raises(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return dp(df).p9().add_point("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot) with pytest.raises(ValueError): ppg.PlotJob(of, calc, plot, skip_caching=True) with pytest.raises(ValueError): ppg.PlotJob(of, calc, plot, skip_table=True) with pytest.raises(ValueError): ppg.PlotJob(of, calc, plot, render_args={"something": 55})
def register_qc_complexity(self): output_filename = self.result_dir / f"{self.name}_complexity.png" def calc(): import mbf_bam counts = mbf_bam.calculate_duplicate_distribution( str(self.bam_filename), str(self.index_filename) ) return pd.DataFrame( { "source": self.name, "Repetition count": list(counts.keys()), "Count": list(counts.values()), } ) def plot(df): import numpy as np unique_count = df["Count"].sum() total_count = (df["Count"] * df["Repetition count"]).sum() pcb = float(unique_count) / total_count if pcb >= 0.9: # pragma: no cover severity = "none" elif pcb >= 0.8: # pragma: no cover severity = "mild" elif pcb >= 0.5: # pragma: no cover severity = "moderate" else: severity = "severe" title = ( "Genomic positions with repetition count reads\nTotal read count: %i\nPCR Bottleneck coefficient: %.2f (%s)" % (total_count, pcb, severity) ) return ( dp(df) .p9() .theme_bw() .add_point("Repetition count", "Count") .add_line("Repetition count", "Count") .scale_y_continuous( trans="log2", breaks=[2 ** x for x in range(1, 24)], labels=lambda x: ["2^%0.f" % np.log(xs) for xs in x], ) .title(title) .pd ) return register_qc( ppg.PlotJob(output_filename, calc, plot) .depends_on(self.load()) .use_cores(-1) )
def register_qc_splicing(self): """How many reads were spliced? How many of those splices were known splice sites, how many were novel""" output_filename = self.result_dir / f"{self.name}_splice_sites.png" def calc(): from mbf_bam import count_introns bam_filename, bam_index_name = self.get_bam_names() counts_per_chromosome = count_introns(bam_filename, bam_index_name) known_splice_sites_by_chr = { chr: set() for chr in self.genome.get_chromosome_lengths() } for gene in self.genome.genes.values(): for start, stop in zip(*gene.introns_all): known_splice_sites_by_chr[gene.chr].add((start, stop)) total_counts = collections.Counter() known_count = 0 unknown_count = 0 for chr, counts in counts_per_chromosome.items(): for k, v in counts.items(): if k[0] == 0xFFFFFFFF: intron_counts = 0xFFFFFFFF - k[1] total_counts[intron_counts] += v else: if k in known_splice_sites_by_chr[chr]: known_count += v else: unknown_count += v result = {"side": [], "x": [], "count": []} result["side"].append("splice sites") result["x"].append("unknown") result["count"].append(unknown_count) result["side"].append("splice sites") result["x"].append("known") result["count"].append(known_count) for x, count in total_counts.items(): result["side"].append("reads with x splices") result["x"].append(x) result["count"].append(count) return pd.DataFrame(result) def plot(df): return (dp(df).p9().theme_bw().add_bar( "x", "count", stat="identity").facet_wrap( "side", scales="free", ncol=1).scale_y_continuous( labels=lambda xs: ["%.2g" % x for x in xs]).title( self.name).theme( panel_spacing_y=0.2).render(output_filename)) return register_qc( ppg.PlotJob(output_filename, calc, plot).depends_on(self.load()).use_cores(-1))
def test_prune(self): j = ppg.PlotJob( "a.png", lambda: pd.DataFrame({"sha": [1]}), lambda df: dp(df).p9().add_point("sha", "sha"), ) j.prune() ppg.run_pipegraph() assert not Path("cache/a.png").exists() assert not Path("a.png").exists()
def test_no_rerun_if_calc_change_but_ignore_codechanges(self): import pydataframe def calc(): append('out/calc', 'A') return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) def plot(df): append('out/plot', 'B') return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.png' job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find('PNG image') != -1) self.assertEqual(read('out/calc'), 'A') self.assertEqual(read('out/plot'), 'B') ppg.new_pipegraph(rc_gen(), quiet=True) def calc2(): append('out/calc', 'A') x = 5 return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) job = ppg.PlotJob(of, calc2, plot) job.ignore_code_changes() ppg.run_pipegraph() self.assertTrue(magic(of).find('PNG image') != -1) self.assertEqual(read('out/calc'), 'A') self.assertEqual(read('out/plot'), 'B')
def test_pdf(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.pdf" ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() assert magic(of).find(b"PDF document") != -1
def test_unpickling_error(self, new_pipegraph): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" p = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() new_pipegraph.new_pipegraph() p = ppg.PlotJob(of, calc, plot) with open("cache/out/test.png", "w") as op: op.write("no unpickling") os.unlink("out/test.png") # so it reruns with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert not os.path.exists("out/test.png") assert isinstance(p.exception, ValueError) assert "Unpickling error in file" in str(p.exception)
def test_no_rerun_if_calc_change_but_ignore_codechanges(self): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find(b"PNG image") != -1) self.assertEqual(read("out/calc"), "A") self.assertEqual(read("out/plot"), "B") ppg.new_pipegraph(rc_gen(), quiet=True) def calc2(): append("out/calc", "A") x = 5 # noqa: E157,F841 return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) job = ppg.PlotJob(of, calc2, plot) job.ignore_code_changes() ppg.run_pipegraph() self.assertTrue(magic(of).find(b"PNG image") != -1) self.assertEqual(read("out/calc"), "A") self.assertEqual(read("out/plot"), "B")
def test_plot_job_dependencies_are_added_to_just_the_cache_job(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" job = ppg.PlotJob(of, calc, plot) dep = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "A")) job.depends_on(dep) assert dep in job.cache_job.prerequisites
def test_no_rerun_if_calc_change_but_ignore_codechanges( self, new_pipegraph): def calc(): append("out/calc", "A") return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): append("out/plot", "B") return dp(df).p9().add_point("X", "Y") of = "out/test.png" job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B" new_pipegraph.new_pipegraph() def calc2(): append("out/calc", "A") x = 5 # noqa: E157,F841 return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) job = ppg.PlotJob(of, calc2, plot) job.ignore_code_changes() ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert read("out/calc") == "A" assert read("out/plot") == "B"
def test_basic_skip_table(self): def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot, skip_table=True) ppg.run_pipegraph() assert magic(of).find(b"PNG image") != -1 assert not os.path.exists(of + ".tsv") assert os.path.exists("cache/out/test.png")
def test_raises_if_calc_returns_non_df(self): def calc(): return None def plot(df): append("out/plot", "B") return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" job = ppg.PlotJob(of, calc, plot) try: ppg.run_pipegraph() raise ValueError("should not be reached") except ppg.RuntimeError: pass assert isinstance(job.cache_job.exception, ppg.JobContractError)
def test_pdf(self): import pydataframe def calc(): return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.pdf' job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find('PDF document') != -1)
def test_basic(self): ppg.new_pipegraph(rc_gen(), quiet=False) def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter("X", "Y") of = "out/test.png" ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find(b"PNG image") != -1)
def test_basic(self): ppg.new_pipegraph(rc_gen(), quiet=False) import pydataframe def calc(): return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.png' job = ppg.PlotJob(of, calc, plot) ppg.run_pipegraph() self.assertTrue(magic(of).find('PNG image') != -1)
def test_plot_job_dependencies_are_added_to_just_the_cache_job(self): import pydataframe def calc(): return pydataframe.DataFrame({ "X": list(range(0, 100)), 'Y': list(range(50, 150)) }) def plot(df): return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.png' job = ppg.PlotJob(of, calc, plot) dep = ppg.FileGeneratingJob('out/A', lambda: write('out/A', 'A')) job.depends_on(dep) #self.assertTrue(dep in job.prerequisites) self.assertTrue(dep in job.cache_job.prerequisites)
def test_raises_if_calc_returns_non_df(self): #import pydataframe def calc(): return None def plot(df): append('out/plot', 'B') return pyggplot.Plot(df).add_scatter('X', 'Y') of = 'out/test.png' job = ppg.PlotJob(of, calc, plot) try: ppg.run_pipegraph() raise ValueError("should not be reached") except ppg.RuntimeError: pass self.assertTrue( isinstance(job.cache_job.exception, ppg.JobContractError))
def test_raises_if_plot_returns_non_plot(self): # import pyggplot def calc(): return pd.DataFrame({ "X": list(range(0, 100)), "Y": list(range(50, 150)) }) def plot(df): return None of = "out/test.png" job = ppg.PlotJob(of, calc, plot) try: ppg.run_pipegraph() raise ValueError("should not be reached") except ppg.RuntimeError: pass assert isinstance(job.exception, ppg.JobContractError)