def test_raises_on_non_dependend_job_injection2_can_be_ignored(self): o = Dummy() of = "out/A" def do_write(): write(of, o.A) # + o.B - but B is not in the dependency chain! job = ppg.FileGeneratingJob(of, do_write) ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D")) def generate_deps(): def load_a(): return "A" def load_b(): return "B" dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA) # let's not do anything with dlA gen_job = ppg.DependencyInjectionJob( "C", generate_deps, check_for_dependency_injections=False) job.depends_on(gen_job) ppg.run_pipegraph() assert os.path.exists(of) # since the gen job crashed
def test_raises_on_non_dependend_job_injection2(self): o = Dummy() of = "out/A" def do_write(): write(of, o.A + o.B) job = ppg.FileGeneratingJob(of, do_write) ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D")) def generate_deps(): def load_a(): return "A" def load_b(): return "B" dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA) # let's not do anything with dlA gen_job = ppg.DependencyInjectionJob("C", generate_deps) job.depends_on(gen_job) with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert not (os.path.exists(of)) # since the gen job crashed assert os.path.exists( "out/D") # since it has no relation to the gen job actually... assert isinstance(gen_job.exception, ppg.JobContractError) assert "case 1" in str(gen_job.exception)
def do_run2(): of = "out/A" def inject(): def dl_b(): o.b = "C" # so this dl has changed... job_dl_b = ppg.DataLoadingJob("ob", dl_b) job_dl.depends_on(job_dl_b) job_fg = ppg.FileGeneratingJob(of, do_write) job_dl = ppg.DataLoadingJob("oa", dl_a) job_fg.depends_on(job_dl) job_inject = ppg.DependencyInjectionJob("inject", inject) job_dl.depends_on(job_inject) ppg.run_pipegraph()
def test_basic(self, new_pipegraph): # TODO: there is a problem with this apporach. The AttributeLoadingJob # references different objects, since it get's pickled alongside with the method, # and depickled again, and then it's not the same object anymore, # so the FileGeneratingJob and the AttributeLoadingJob in this test # reference different objects. # I'm not sure how to handle this right now though. # I have an idea: Do JobGraphModifyingJobs in each slave, and send back just the # dependency data (and new job name). # that way, we can still execute on any slave, and all the pointers should be # right. new_pipegraph.new_pipegraph() o = Dummy() of = "out/A" def do_write(): # logging.info("Accessing dummy (o) %i in pid %s" % (id(o), os.getpid())) write(of, o.A + o.B) job = ppg.FileGeneratingJob(of, do_write) def generate_deps(): def load_a(): # logging.info('executing load A') return "A" def load_b(): # logging.info('executing load B') return "B" # logging.info("Creating dl on %i in pid %s" % (id(o), os.getpid())) dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a) # logging.info("created dlA") dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b) job.depends_on(dlA) job.depends_on(dlB) return [dlA, dlB] gen_job = ppg.DependencyInjectionJob("C", generate_deps) job.depends_on(gen_job) ppg.run_pipegraph() assert read(of) == "AB"
def test_injecting_filegenerating_job(self): of = "out/A" def do_write(): write(of, read("out/B")) job = ppg.FileGeneratingJob(of, do_write) def generate_dep(): def write_B(): write("out/B", "B") inner_job = ppg.FileGeneratingJob("out/B", write_B) job.depends_on(inner_job) job_gen = ppg.DependencyInjectionJob("gen_job", generate_dep) job.depends_on(job_gen) ppg.run_pipegraph() assert read("out/A") == "B"
jobs = [] global urls # we use a global to communicate with the later job # please note that this is only possible in Job generating and data loading jobs, # not in the output jobs. urls = retrieve_urls() for url in urls: jobs.append(download_job(url)) jobs.append( # this makes sure that if you remove urls, the output job would also rerun. # adding urls not seen before would make it rerun either way. pypipegraph.ParameterInvariant('retrieved_urls', urls)) return jobs # A DependencyInjectionJob allows us to make job_count depend on the jobs returned by generate_download_jobs # during the runtime of the graph job_generate = pypipegraph.DependencyInjectionJob('retrieve_url', generate_download_jobs) # now, this needs to be more fancy as well. def count_characters_and_write_output(): counts = {} for url in urls: dj = download_job( url ) # this will get us the *same* object as download_job(url) returned the first time target_file = dj.job_id # the job_id for FileGeneratingJob is the output filename. file_handle = open(target_file, 'rb') data = file_handle.read() count = len(data) counts[url] = len(data)
def inner(): ppg.DependencyInjectionJob(5, lambda: 1)
def inner(): ppg.DependencyInjectionJob("out/a", "shu")