예제 #1
0
        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
예제 #2
0
        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            jobD.depends_on(dlB)  # this line must raise
예제 #3
0
        def generate_deps():
            def load_a():
                # logging.info('executing load A')
                return "A"

            def load_b():
                # logging.info('executing load B')
                return "B"

            # logging.info("Creating dl on %i in pid %s" % (id(o), os.getpid()))
            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            # logging.info("created dlA")
            dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            job.depends_on(dlB)
            return [dlA, dlB]
예제 #4
0
    def test_generated_jobs_that_can_not_run_right_away_because_of_dataloading_do_not_crash(
            self):
        o = Dummy()
        existing_dl = ppg.AttributeLoadingJob("a", o, "a", lambda: "Ashu")

        def gen():
            new_dl = ppg.AttributeLoadingJob("b", o, "b", lambda: "Bshu")
            fg_a = ppg.FileGeneratingJob("out/C", lambda: write("out/C", o.a))
            fg_b = ppg.FileGeneratingJob("out/D", lambda: write("out/D", o.b))
            fg_a.depends_on(existing_dl)
            fg_b.depends_on(new_dl)

        ppg.JobGeneratingJob("E", gen)
        ppg.run_pipegraph()
        assert read("out/C") == "Ashu"
        assert read("out/D") == "Bshu"
예제 #5
0
    def test_calc_depends_on_added_dependencies(self):
        o = Dummy()
        load_attr = ppg.AttributeLoadingJob("load_attr", o, "o", lambda: 55)

        def calc():
            return o.o

        def out():
            write("out/A", str(o.o2))

        cached = ppg.CachedAttributeLoadingJob("out/cached_job", o, "o2", calc)
        fg = ppg.FileGeneratingJob("out/A", out)
        fg.depends_on(cached)
        cached.depends_on(load_attr)
        ppg.run_pipegraph()
        assert read("out/A") == "55"
예제 #6
0
    def calc_norm_data(self):
        def calc():
            """Normalized data is a dictionary: lane_name: 2d matrix"""
            return self.do_calc_norm_data()

        of = self.cache_dir / "norm_data"
        return ppg.AttributeLoadingJob(of, self, "norm_data_", calc).depends_on(
            [
                ppg.ParameterInvariant(of, (self.normalization_strategy.name,)),
                self.heatmap.calc_raw_data(),
                ppg.FunctionInvariant(
                    "genomics.regions.heatmap."
                    + self.normalization_strategy.name
                    + "calc_func",
                    self.normalization_strategy.__class__.calc,
                ),
            ]
            + self.normalization_strategy.get_dependencies(self.heatmap.lanes_to_draw)
        )
예제 #7
0
    def test_cached_jobs_get_depencies_only_on_the_lazy_filegenerator_not_on_the_loading_job(
            self):
        o = Dummy()

        def calc():
            return list(range(0, o.b))

        def load(value):
            o.a = value

        job = ppg.CachedDataLoadingJob("a", calc, load)

        def do_b():
            return 100

        jobB = ppg.AttributeLoadingJob("b", o, "b", do_b)
        job.depends_on(jobB)
        assert not (jobB in job.prerequisites)
        assert jobB in job.lfg.prerequisites
예제 #8
0
    def test_cached_jobs_get_depencies_only_on_the_lazy_filegenerator_not_on_the_loading_job(
            self):
        o = Dummy()

        def calc():
            return list(range(0, o.b))

        job = ppg.CachedAttributeLoadingJob("a", o, "a", calc)

        def do_b():
            return 100

        jobB = ppg.AttributeLoadingJob("b", o, "b", do_b)
        job.depends_on(jobB)
        assert not (jobB in job.prerequisites)
        assert jobB in job.lfg.prerequisites
        ppg.run_pipegraph()
        assert jobB.was_invalidated
        assert job.was_invalidated
예제 #9
0
    def calc_raw_data(self):
        # we don't use a CachedAttributeLoadingJob so that we can compress the output.
        # don't knock that, it easily saves a gigabyte of data on a larger GR

        cache_dir = self.cache_dir / "raw_data"
        cache_dir.mkdir(exist_ok=True, parents=True)

        jobs = []
        smoothing_invariant = (
            ppg.FunctionInvariant(
                "genomics.regions.heatmap."
                + self.smoothing_strategy.name
                + "calc_func",
                self.smoothing_strategy.__class__.calc,
            ),
        )
        for lane in self.lanes_to_draw:
            key = ",".join(
                [
                    self.gr_to_draw.name,
                    self.region_strategy.name,
                    self.smoothing_strategy.name,
                    lane.name,
                ]
            )
            key = hashlib.md5(key.encode()).hexdigest()
            of = cache_dir / (key + ".npz")

            def calc(lane=lane, of=of):
                """Raw data is a dictionary: lane_name: 2d matrix"""
                raw_data = {lane.name: self.do_calc_raw_data(lane)}
                np.savez_compressed(of, **raw_data)

            jobs.append(
                ppg.FileGeneratingJob(of, calc).depends_on(
                    [
                        ppg.ParameterInvariant(
                            of,
                            (
                                self.smoothing_strategy.name,
                                lane.name,
                                self.gr_to_draw.name,
                            ),
                        ),
                        smoothing_invariant,
                        self.calc_regions(),
                        ppg.FunctionInvariant(
                            "genomics.regions.heatmap.do_calc_raw_data",
                            Heatmap.do_calc_raw_data,
                        ),
                    ]
                    + self.smoothing_strategy.get_dependencies(lane)
                )
            )

        def load():
            result = {}
            for job in jobs:
                npzfile = np.load(job.job_id)
                for f in npzfile.files:
                    result[f] = npzfile[f]
            return result

        key = ",".join(
            [
                self.gr_to_draw.name,
                self.region_strategy.name,
                self.smoothing_strategy.name,
                ",".join(list(sorted([x.name for x in self.lanes_to_draw]))),
            ]
        )
        return ppg.AttributeLoadingJob(
            key + "_load", self, "raw_data_", load
        ).depends_on(jobs)
예제 #10
0
 def gen():
     new_dl = ppg.AttributeLoadingJob("b", o, "b", lambda: "Bshu")
     fg_a = ppg.FileGeneratingJob("out/C", lambda: write("out/C", o.a))
     fg_b = ppg.FileGeneratingJob("out/D", lambda: write("out/D", o.b))
     fg_a.depends_on(existing_dl)
     fg_b.depends_on(new_dl)