def test_cached_dataloading_job_does_not_load_its_preqs_on_cached( self, new_pipegraph ): o = Dummy() def a(): o.a = "A" append("out/A", "A") def calc(): append("out/B", "B") return o.a * 2 def load(value): o.c = value append("out/Cx", "C") # not C, that's the cached file, you know... def output(): write("out/D", o.c) dl = ppg.DataLoadingJob("out/A", a) ca = ppg.CachedDataLoadingJob("out/C", calc, load) fg = ppg.FileGeneratingJob("out/D", output) fg.depends_on(ca) ca.depends_on(dl) ppg.run_pipegraph() assert read("out/D") == "AA" # we did write the final result assert read("out/A") == "A" # ran the dl job assert read("out/B") == "B" # ran the calc job... assert read("out/Cx") == "C" # ran the load jobo os.unlink("out/D") # so the filegen and the loadjob of cached should rerun... new_pipegraph.new_pipegraph() dl = ppg.DataLoadingJob("out/A", a) ca = ppg.CachedDataLoadingJob("out/C", calc, load) fg = ppg.FileGeneratingJob("out/D", output) fg.depends_on(ca) ca.depends_on(dl) ppg.run_pipegraph() assert read("out/D") == "AA" # we did write the final result assert read("out/A") == "A" # did not run the dl job assert read("out/B") == "B" # did not run the calc job again assert read("out/Cx") == "CC" # did run the load job again
def gen(): def load(): global shu shu = "123" def do_write(): global shu write("out/A", shu) dl = ppg.DataLoadingJob("dl", load) jobB = ppg.FileGeneratingJob("out/A", do_write) jobB.depends_on(dl)
def test_raises_if_generating_within_dataload(self): ppg.util.global_pipegraph.quiet = False write_job = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "aa")) def load(): ppg.FileGeneratingJob("out/B", lambda: write("out/B", "aa")) dl = ppg.DataLoadingJob("load_data", load) write_job.depends_on(dl) with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert "Trying to add new jobs to running pipeline" in str(dl.exception)
def test_cached_attribute_job_does_not_load_its_preqs_on_cached( self, new_pipegraph ): o = Dummy() def a(): o.a = "A" append("out/A", "A") def calc(): append("out/B", "B") return o.a * 2 def output(): write("out/D", o.c) dl = ppg.DataLoadingJob("out/A", a) ca = ppg.CachedAttributeLoadingJob("out/C", o, "c", calc) fg = ppg.FileGeneratingJob("out/D", output) fg.depends_on(ca) ca.depends_on(dl) ppg.run_pipegraph() assert read("out/D") == "AA" # we did write the final result assert read("out/A") == "A" # ran the dl job assert read("out/B") == "B" # ran the calc job... os.unlink("out/D") # so the filegen and the loadjob of cached should rerun... new_pipegraph.new_pipegraph() dl = ppg.DataLoadingJob("out/A", a) ca = ppg.CachedAttributeLoadingJob("out/C", o, "c", calc) fg = ppg.FileGeneratingJob("out/D", output) fg.depends_on(ca) ca.depends_on(dl) ppg.run_pipegraph() assert read("out/D") == "AA" # we did write the final result assert read("out/A") == "A" # did not run the dl job assert read("out/B") == "B" # did not run the calc job again
def do_run2(): of = "out/A" def inject(): def dl_b(): o.b = "C" # so this dl has changed... job_dl_b = ppg.DataLoadingJob("ob", dl_b) job_dl.depends_on(job_dl_b) job_fg = ppg.FileGeneratingJob(of, do_write) job_dl = ppg.DataLoadingJob("oa", dl_a) job_fg.depends_on(job_dl) job_inject = ppg.DependencyInjectionJob("inject", inject) job_dl.depends_on(job_inject) ppg.run_pipegraph()
def test_dataloading_job_changing_cwd(new_pipegraph): from pathlib import Path os.mkdir("shu") def load(): os.chdir("shu") Path("b").write_text("world") return 55 a = ppg.FileGeneratingJob("a", lambda: Path("a").write_text("hello")) b = ppg.DataLoadingJob("b", load) a.depends_on(b) ppg.run_pipegraph() assert read("a") == "hello" assert read("shu/b") == "world"
def _anno_load(self, anno): def load(): self.ddf.df = pd.concat( [ self.ddf.df, self.ddf.parent.df[anno.columns].reindex(self.ddf.df.index), ], axis=1, ) job = ppg.DataLoadingJob(self.ddf.cache_dir / anno.get_cache_name(), load) job.depends_on( ppg.FunctionInvariant( self.ddf.cache_dir / (anno.get_cache_name() + "_funcv"), anno.calc ), self.ddf.parent.anno_jobs[anno.get_cache_name()], self.ddf.load(), ) return job
def inject(): def dl_b(): o.b = "C" # so this dl has changed... job_dl_b = ppg.DataLoadingJob("ob", dl_b) job_dl.depends_on(job_dl_b)
def inject(): def dl_b(): o.b = "B" job_dl_b = ppg.DataLoadingJob("ob", dl_b) job_dl.depends_on(job_dl_b)