def generate_deps(): def load_a(): return "A" def load_b(): return "B" dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA)
def generate_deps(): def load_a(): return "A" def load_b(): return "B" dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA) jobD.depends_on(dlB) # this line must raise
def generate_deps(): def load_a(): # logging.info('executing load A') return "A" def load_b(): # logging.info('executing load B') return "B" # logging.info("Creating dl on %i in pid %s" % (id(o), os.getpid())) dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) # logging.info("created dlA") dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA) job.depends_on(dlB) return [dlA, dlB]
def test_generated_jobs_that_can_not_run_right_away_because_of_dataloading_do_not_crash( self): o = Dummy() existing_dl = ppg.AttributeLoadingJob("a", o, "a", lambda: "Ashu") def gen(): new_dl = ppg.AttributeLoadingJob("b", o, "b", lambda: "Bshu") fg_a = ppg.FileGeneratingJob("out/C", lambda: write("out/C", o.a)) fg_b = ppg.FileGeneratingJob("out/D", lambda: write("out/D", o.b)) fg_a.depends_on(existing_dl) fg_b.depends_on(new_dl) ppg.JobGeneratingJob("E", gen) ppg.run_pipegraph() assert read("out/C") == "Ashu" assert read("out/D") == "Bshu"
def test_calc_depends_on_added_dependencies(self): o = Dummy() load_attr = ppg.AttributeLoadingJob("load_attr", o, "o", lambda: 55) def calc(): return o.o def out(): write("out/A", str(o.o2)) cached = ppg.CachedAttributeLoadingJob("out/cached_job", o, "o2", calc) fg = ppg.FileGeneratingJob("out/A", out) fg.depends_on(cached) cached.depends_on(load_attr) ppg.run_pipegraph() assert read("out/A") == "55"
def calc_norm_data(self): def calc(): """Normalized data is a dictionary: lane_name: 2d matrix""" return self.do_calc_norm_data() of = self.cache_dir / "norm_data" return ppg.AttributeLoadingJob(of, self, "norm_data_", calc).depends_on( [ ppg.ParameterInvariant(of, (self.normalization_strategy.name,)), self.heatmap.calc_raw_data(), ppg.FunctionInvariant( "genomics.regions.heatmap." + self.normalization_strategy.name + "calc_func", self.normalization_strategy.__class__.calc, ), ] + self.normalization_strategy.get_dependencies(self.heatmap.lanes_to_draw) )
def test_cached_jobs_get_depencies_only_on_the_lazy_filegenerator_not_on_the_loading_job( self): o = Dummy() def calc(): return list(range(0, o.b)) def load(value): o.a = value job = ppg.CachedDataLoadingJob("a", calc, load) def do_b(): return 100 jobB = ppg.AttributeLoadingJob("b", o, "b", do_b) job.depends_on(jobB) assert not (jobB in job.prerequisites) assert jobB in job.lfg.prerequisites
def test_cached_jobs_get_depencies_only_on_the_lazy_filegenerator_not_on_the_loading_job( self): o = Dummy() def calc(): return list(range(0, o.b)) job = ppg.CachedAttributeLoadingJob("a", o, "a", calc) def do_b(): return 100 jobB = ppg.AttributeLoadingJob("b", o, "b", do_b) job.depends_on(jobB) assert not (jobB in job.prerequisites) assert jobB in job.lfg.prerequisites ppg.run_pipegraph() assert jobB.was_invalidated assert job.was_invalidated
def calc_raw_data(self): # we don't use a CachedAttributeLoadingJob so that we can compress the output. # don't knock that, it easily saves a gigabyte of data on a larger GR cache_dir = self.cache_dir / "raw_data" cache_dir.mkdir(exist_ok=True, parents=True) jobs = [] smoothing_invariant = ( ppg.FunctionInvariant( "genomics.regions.heatmap." + self.smoothing_strategy.name + "calc_func", self.smoothing_strategy.__class__.calc, ), ) for lane in self.lanes_to_draw: key = ",".join( [ self.gr_to_draw.name, self.region_strategy.name, self.smoothing_strategy.name, lane.name, ] ) key = hashlib.md5(key.encode()).hexdigest() of = cache_dir / (key + ".npz") def calc(lane=lane, of=of): """Raw data is a dictionary: lane_name: 2d matrix""" raw_data = {lane.name: self.do_calc_raw_data(lane)} np.savez_compressed(of, **raw_data) jobs.append( ppg.FileGeneratingJob(of, calc).depends_on( [ ppg.ParameterInvariant( of, ( self.smoothing_strategy.name, lane.name, self.gr_to_draw.name, ), ), smoothing_invariant, self.calc_regions(), ppg.FunctionInvariant( "genomics.regions.heatmap.do_calc_raw_data", Heatmap.do_calc_raw_data, ), ] + self.smoothing_strategy.get_dependencies(lane) ) ) def load(): result = {} for job in jobs: npzfile = np.load(job.job_id) for f in npzfile.files: result[f] = npzfile[f] return result key = ",".join( [ self.gr_to_draw.name, self.region_strategy.name, self.smoothing_strategy.name, ",".join(list(sorted([x.name for x in self.lanes_to_draw]))), ] ) return ppg.AttributeLoadingJob( key + "_load", self, "raw_data_", load ).depends_on(jobs)
def gen(): new_dl = ppg.AttributeLoadingJob("b", o, "b", lambda: "Bshu") fg_a = ppg.FileGeneratingJob("out/C", lambda: write("out/C", o.a)) fg_b = ppg.FileGeneratingJob("out/D", lambda: write("out/D", o.b)) fg_a.depends_on(existing_dl) fg_b.depends_on(new_dl)