Ejemplo n.º 1
0
    def test_cached_dataloading_job_does_not_load_its_preqs_on_cached(
        self, new_pipegraph
    ):
        o = Dummy()

        def a():
            o.a = "A"
            append("out/A", "A")

        def calc():
            append("out/B", "B")
            return o.a * 2

        def load(value):
            o.c = value
            append("out/Cx", "C")  # not C, that's the cached file, you know...

        def output():
            write("out/D", o.c)

        dl = ppg.DataLoadingJob("out/A", a)
        ca = ppg.CachedDataLoadingJob("out/C", calc, load)
        fg = ppg.FileGeneratingJob("out/D", output)
        fg.depends_on(ca)
        ca.depends_on(dl)
        ppg.run_pipegraph()
        assert read("out/D") == "AA"  # we did write the final result
        assert read("out/A") == "A"  # ran the dl job
        assert read("out/B") == "B"  # ran the calc job...
        assert read("out/Cx") == "C"  # ran the load jobo
        os.unlink("out/D")  # so the filegen and the loadjob of cached should rerun...
        new_pipegraph.new_pipegraph()
        dl = ppg.DataLoadingJob("out/A", a)
        ca = ppg.CachedDataLoadingJob("out/C", calc, load)
        fg = ppg.FileGeneratingJob("out/D", output)
        fg.depends_on(ca)
        ca.depends_on(dl)
        ppg.run_pipegraph()
        assert read("out/D") == "AA"  # we did write the final result
        assert read("out/A") == "A"  # did not run the dl job
        assert read("out/B") == "B"  # did not run the calc job again
        assert read("out/Cx") == "CC"  # did run the load job again
Ejemplo n.º 2
0
        def gen():
            def load():
                global shu
                shu = "123"

            def do_write():
                global shu
                write("out/A", shu)

            dl = ppg.DataLoadingJob("dl", load)
            jobB = ppg.FileGeneratingJob("out/A", do_write)
            jobB.depends_on(dl)
Ejemplo n.º 3
0
    def test_raises_if_generating_within_dataload(self):
        ppg.util.global_pipegraph.quiet = False
        write_job = ppg.FileGeneratingJob("out/A", lambda: write("out/A", "aa"))

        def load():
            ppg.FileGeneratingJob("out/B", lambda: write("out/B", "aa"))

        dl = ppg.DataLoadingJob("load_data", load)
        write_job.depends_on(dl)
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "Trying to add new jobs to running pipeline" in str(dl.exception)
Ejemplo n.º 4
0
    def test_cached_attribute_job_does_not_load_its_preqs_on_cached(
        self, new_pipegraph
    ):
        o = Dummy()

        def a():
            o.a = "A"
            append("out/A", "A")

        def calc():
            append("out/B", "B")
            return o.a * 2

        def output():
            write("out/D", o.c)

        dl = ppg.DataLoadingJob("out/A", a)
        ca = ppg.CachedAttributeLoadingJob("out/C", o, "c", calc)
        fg = ppg.FileGeneratingJob("out/D", output)
        fg.depends_on(ca)
        ca.depends_on(dl)
        ppg.run_pipegraph()
        assert read("out/D") == "AA"  # we did write the final result
        assert read("out/A") == "A"  # ran the dl job
        assert read("out/B") == "B"  # ran the calc job...
        os.unlink("out/D")  # so the filegen and the loadjob of cached should rerun...
        new_pipegraph.new_pipegraph()
        dl = ppg.DataLoadingJob("out/A", a)
        ca = ppg.CachedAttributeLoadingJob("out/C", o, "c", calc)
        fg = ppg.FileGeneratingJob("out/D", output)
        fg.depends_on(ca)
        ca.depends_on(dl)
        ppg.run_pipegraph()
        assert read("out/D") == "AA"  # we did write the final result
        assert read("out/A") == "A"  # did not run the dl job
        assert read("out/B") == "B"  # did not run the calc job again
Ejemplo n.º 5
0
        def do_run2():
            of = "out/A"

            def inject():
                def dl_b():
                    o.b = "C"  # so this dl has changed...

                job_dl_b = ppg.DataLoadingJob("ob", dl_b)
                job_dl.depends_on(job_dl_b)

            job_fg = ppg.FileGeneratingJob(of, do_write)
            job_dl = ppg.DataLoadingJob("oa", dl_a)
            job_fg.depends_on(job_dl)
            job_inject = ppg.DependencyInjectionJob("inject", inject)
            job_dl.depends_on(job_inject)
            ppg.run_pipegraph()
Ejemplo n.º 6
0
def test_dataloading_job_changing_cwd(new_pipegraph):
    from pathlib import Path

    os.mkdir("shu")

    def load():
        os.chdir("shu")
        Path("b").write_text("world")
        return 55

    a = ppg.FileGeneratingJob("a", lambda: Path("a").write_text("hello"))
    b = ppg.DataLoadingJob("b", load)
    a.depends_on(b)
    ppg.run_pipegraph()
    assert read("a") == "hello"
    assert read("shu/b") == "world"
Ejemplo n.º 7
0
    def _anno_load(self, anno):
        def load():
            self.ddf.df = pd.concat(
                [
                    self.ddf.df,
                    self.ddf.parent.df[anno.columns].reindex(self.ddf.df.index),
                ],
                axis=1,
            )

        job = ppg.DataLoadingJob(self.ddf.cache_dir / anno.get_cache_name(), load)
        job.depends_on(
            ppg.FunctionInvariant(
                self.ddf.cache_dir / (anno.get_cache_name() + "_funcv"), anno.calc
            ),
            self.ddf.parent.anno_jobs[anno.get_cache_name()],
            self.ddf.load(),
        )
        return job
Ejemplo n.º 8
0
            def inject():
                def dl_b():
                    o.b = "C"  # so this dl has changed...

                job_dl_b = ppg.DataLoadingJob("ob", dl_b)
                job_dl.depends_on(job_dl_b)
Ejemplo n.º 9
0
            def inject():
                def dl_b():
                    o.b = "B"

                job_dl_b = ppg.DataLoadingJob("ob", dl_b)
                job_dl.depends_on(job_dl_b)