def run(self): checkpoint_manager = options().checkpoint assert isinstance(checkpoint_manager, CheckPointManager) t = time.time() if checkpoint_manager.is_recovering: checkpoint_manager.restore_checkpoint() self.root_problem = \ checkpoint_manager.checkpoint_state.root_problem self.check_outputprefix() else: '''check input arguments''' self.check_options() '''build the problem structure''' self.root_problem = self.build_subproblems() '''build jobs''' self.build_jobs() '''connect jobs into a DAG''' self.connect_jobs() '''Queue up first level jobs (i.e. those with no dependency). Once these run, they should automatically enqueue the rest of the DAG through joins and callbacks ''' self.enqueue_firstlevel_job() '''start the checkpointing (has any effects only in checkpointing mode)''' checkpoint_manager.start_checkpointing(self.root_problem) '''Wait for all jobs to finish''' if (not JobPool().wait_for_all_jobs()): _LOG.exception( "There have been errors in executed jobs. Terminating.") sys.exit(1) ''' terminate The job pool and release memory''' JobPool().terminate() ''' Pause Checkpointing''' checkpoint_manager.pause_checkpointing() # checkpoint_manager.force_checkpoint() '''Merge results into final outputs''' self.merge_results() '''Output final results''' self.output_results() ''' Pause Checkpointing''' checkpoint_manager.stop_checkpointing() _LOG.info("Current execution Finished in %d seconds" % (time.time() - t)) _LOG.info( "All checkpointed executions Finished in %d cumulative time" % (checkpoint_manager.get_total_time()))
def run(): global root_problem JobPool().terminate() JobPool().__init__(2) JobPool(2) '''build the problem structure''' root_problem = build_subproblems() '''build the dat of jobs''' build_job_dag(root_problem) '''All buildmodel jobs are ready to be started (i.e. no dependency). Queue them up. Once they run, they will automatically enqueue the rest of the DAG through joins and callbacks ''' def enqueue_buildmodel_job(problem): if problem.parent is not None: JobPool().enqueue_job(problem.jobs["buildmodel"]) for child in problem.children: enqueue_buildmodel_job(child) enqueue_buildmodel_job(root_problem) '''Wait for all jobs to finish''' JobPool().wait_for_all_jobs() ''' print out the results of summarize jobs. We could merge the results from all summarize jobs here if we wanted ''' for problem in root_problem.iter_nodes_at_level(SUMMERIZE_LEVEL): print(trimstr(problem.get_job_result_by_name("buildmodel")), trimstr(problem.get_job_result_by_name("summarize")))
def testError(self): find_job = TestExternalJob() find_job.pattern = "somerandomdirectorywewillneverhavehere_ordowe" ''' Let's ignore this error in subsequent test cases. ''' find_job.ignore_error = True try: JobPool().enqueue_job(find_job) assert JobPool().wait_for_all_jobs() is False except JobError as e: assert str(e).find("No such file or directory") != -1, \ "The error we expected is no such file or directory" assert JobPool().get_asynch_result_object(find_job).successful() is \ False, "We expected the job to fail"
def perform(self): ''' Distributes fragments to alignments subsets with best score, and runs align jobs on those. Also, creates new chunks of fragments for better parallelism. ''' ''' Figure out which fragment should go to which subproblem''' self.figureout_fragment_subset() ''' For each alignment subproblem, 1) make sure its fragments are evenly distributed to fragment chunks. 2) Setup alignment jobs for its children and enqueue them''' alg_problems = [alg for p in self.root_problem.children for alg in p.children ] for alg_problem in alg_problems: assert isinstance(alg_problem, SeppProblem) chunks = len(alg_problem.get_children()) fragment_chunks = alg_problem.fragments.divide_to_equal_chunks(chunks) ''' Now setup alignment jobs and enqueue them''' for (i,fragment_chunk_problem) in enumerate(alg_problem.children): fragment_chunk_problem.fragments = fragment_chunks[i] aj = fragment_chunk_problem.jobs['hmmalign'] assert isinstance(aj,HMMAlignJob) ''' First Complete setting up alignments''' aj.hmmmodel = alg_problem.get_job_result_by_name('hmmbuild') aj.base_alignment = alg_problem.jobs["hmmbuild"].infile if fragment_chunk_problem.fragments is None or fragment_chunk_problem.fragments.is_empty(): aj.fake_run = True else: fragment_chunk_problem.fragments.write_to_path(aj.fragments) ''' Now the align job can be put on the queue ''' JobPool().enqueue_job(aj)
def perform(self): pp = self.placement_problem fullExtendedAlignments = self.merge_subalignments() for i in range(0, self.root_problem.fragment_chunks): fullExtendedAlignment = fullExtendedAlignments[i] # Split the backbone alignment and query sequences # into separate files queryExtendedAlignment = \ fullExtendedAlignment.get_fragments_readonly_alignment() baseAlignment = fullExtendedAlignment.get_base_readonly_alignment() pj = pp.jobs[get_placement_job_name(i)] assert isinstance(pj, PplacerJob) if (queryExtendedAlignment.is_empty()): pj.fake_run = True # Write out the extended alignments, split into query and full- # length for pplacer queryExtendedAlignment.write_to_path(pj.extended_alignment_file) baseAlignment.write_to_path(pj.backbone_alignment_file) # But keep the extended alignment on everything pj.set_attribute("full_extended_alignment_object", fullExtendedAlignment) JobPool().enqueue_job(pj)
def testCpuCount(self): # Just to make different test cases independent of each other. config._options_singelton = None # Disable main config path for this test config.main_config_path = self.fp_config JobPool().terminate() JobPool().__init__(7) sys.argv = [sys.argv[0], "-x", "7"] assert options().cpu == 7, "Commandline option -x not read properly" # clean up after test: # 1) the JobPool CPU counts needs to be reset to the default # 2) the command line arguments must be restored JobPool().terminate() JobPool().__init__(cpu_count()) sys.argv = [sys.argv[0], "-x", str(cpu_count())] config._options_singelton = None options()
def perform(self): ''' Aggregate fragments from tips to the SUMMERIZE_LEVEL level problem, and enqueue a summarize job ''' print("Process [%s]: Join_ApplyModel_Summarize joining %s" % (os.getpid(), self.summarylevel_problem), file=sys.stderr) resultsPerTipSubproblem = [] for tip in self.summarylevel_problem.iter_leaves(): resultsPerTipSubproblem.append( tip.get_job_result_by_name("applymodel")) self.summarylevel_problem.jobs[ "summarize"].resultsPerTipSubproblem = resultsPerTipSubproblem JobPool().enqueue_job(self.summarylevel_problem.jobs["summarize"])
def testSuccess(self): find_job = TestExternalJob() find_job.pattern = "." find_job.options = "-name *.py" JobPool().enqueue_job(find_job) JobPool().wait_for_all_jobs() res0 = find_job.result.split('\n')[0] assert res0 != "" find_job = TestExternalJob() find_job.pattern = ".." find_job.options = "-name %s" % res0.replace("./test", "*") JobPool().enqueue_job(find_job) JobPool().wait_for_all_jobs() res1 = find_job.result.split('\n')[0] assert res1 != "" assert res1.replace("../unittest/", "./") == res0
def testSuccess(self): find_job = TestExternalJob() find_job.pattern = "." find_job.options = "-name test*.py" JobPool().enqueue_job(find_job) JobPool().wait_for_all_jobs() res0 = find_job.result.split('\n')[0] assert res0 != "" find_job = TestExternalJob() find_job.pattern = ".." find_job.options = "-name %s" % res0.split('/')[-1] JobPool().enqueue_job(find_job) JobPool().wait_for_all_jobs() res1 = find_job.result.split('\n')[0] assert res1 != "" assert res1.endswith(res0[2:])
def perform(self): pp = self.placement_problem fullExtendedAlignments = self.merge_subalignments() for i in range(0, self.root_problem.fragment_chunks): fullExtendedAlignment = fullExtendedAlignments[i] # Split the backbone alignment and query sequences into # separate files queryExtendedAlignment = \ fullExtendedAlignment.get_fragments_readonly_alignment() base_alignment = fullExtendedAlignment.\ get_base_readonly_alignment() pj = pp.jobs[get_placement_job_name(i)] if queryExtendedAlignment.is_empty(): pj.fake_run = True if self.placer == "pplacer": assert isinstance(pj, PplacerJob) # Write out the extended alignments, split into query and # full-length for pplacer queryExtendedAlignment.write_to_path( pj.extended_alignment_file) base_alignment.write_to_path(pj.backbone_alignment_file) elif self.placer == "epa": # assert isinstance(pj, EPAJob) raise ValueError("EPA Currently not supported") # Write out the extended alignments in phylip for EPA # fullExtendedAlignment.write_to_path( # pj.extended_alignment_file, schema="PHYLIP") # keep the extended alignment on everything # pj.set_attribute("full_extended_alignment_object", # fullExtendedAlignment) # TODO: Removed this, as it can cause unexpected lockups output = open(pj.full_extended_alignment_file, 'wb') pickle.dump(fullExtendedAlignment, output) output.close() # Enqueue the placement job JobPool().enqueue_job(pj)
def perform(self): print("Process [%s]: Join_BuildModel_SearchFragment joining %s" % (os.getpid(), self.grandparent_problem), file=sys.stderr) ''' 1 - start from grandparent fragments. 2 - Based on results from joined search operations, figure out for each grandparent fragments whether it is closer to parent1 or parent2 (grandparent's children), and divide the fragments accordingly 3 - Set fragments attribute of parent1 and parent2 based on preceding calculation 4 - For each of grandchildrens of the grandparent problem, 4-1 Set the fragments attribute of their searchfragment job to the fragments of its parents (set in step 3) 4-2 Set the model attribute of their searchfragment job to the model computed in its buildmodel job 4-3 Enqueue its searchfragment job The above procedure is equivalent of figuring out the child HMM model that a fragment is closer to, and classifying it accordingly. ''' fragments = self.grandparent_problem.fragments frags_range = list(range(0, len(fragments))) c1_res = self.grandparent_problem.children[0].get_job_result_by_name( "searchfragment") c2_res = self.grandparent_problem.children[1].get_job_result_by_name( "searchfragment") model_search = [c1_res[i] - c2_res[i] for i in frags_range] self.grandparent_problem.children[0].fragments = [ fragments[i] for i in frags_range if model_search[i] < 0 ] self.grandparent_problem.children[1].fragments = [ fragments[i] for i in frags_range if model_search[i] >= 0 ] for l1 in self.grandparent_problem.children: fragments = l1.fragments for l2 in l1.children: l2j = l2.jobs["searchfragment"] l2j.model = l2.get_job_result_by_name("buildmodel") l2j.fragments = fragments JobPool().enqueue_job(l2j)
def perform(self): pp = self.placement_problem fullExtendedAlignment = self.merge_subalignments() pj = pp.jobs["placer"] #Split the backbone alignment and query sequences into separate files queryExtendedAlignment = fullExtendedAlignment.get_fragments_readonly_alignment( ) baseAlignment = fullExtendedAlignment.get_base_readonly_alignment() # Check for empty fragment files if (queryExtendedAlignment.is_empty()): pj.fake_run = True elif self.placer == "pplacer": assert isinstance(pj, PplacerJob) #Write out the extended alignments, split into query and full-length for pplacer queryExtendedAlignment.write_to_path(pj.extended_alignment_file) baseAlignment.write_to_path(pj.backbone_alignment_file) elif self.placer == "epa": assert isinstance(pj, EPAJob) #Write out the extended alignments in phylip for EPA fullExtendedAlignment.write_to_path(pj.extended_alignment_file, schema="PHYLIP") #keep the extended alignment on everything #pj.set_attribute("full_extended_alignment_object", fullExtendedAlignment) #TODO: Removed this, as it can cause unexpected lockups output = open(pj.full_extended_alignment_file, 'wb') pickle.dump(fullExtendedAlignment, output) output.close() # Enqueue the placement job JobPool().enqueue_job(pj)
def perform(self): ''' First print out some summary of everything up to here. Then update applymodel jobs with correct fragment and model, and then enqueue them. ''' print("Process [%s]: Join_tip_searchfragment joining %s" % (os.getpid(), self.root_problem), file=sys.stderr) def print_fragments(problem): print("level " + str(problem.level), str(problem.get_job_result_by_name("buildmodel")), problem.fragments) for c in problem.children: print_fragments(c) print_fragments(self.root_problem) for p in root_problem.iter_leaves(): j = p.jobs["applymodel"] j.fragments = p.jobs["searchfragment"].fragments j.model = p.jobs["searchfragment"].model JobPool().enqueue_job(j)
def testNoPipe(self): find_job = TestExternalJob(pipe=1) find_job.pattern = "." find_job.options = "-name *.py" JobPool().enqueue_job(find_job) JobPool().wait_for_all_jobs() res0 = find_job.result.split('\n')[0] assert res0 != "" find_job = TestExternalJob(pipe=1) find_job.pattern = "somerandomdirectorywewillneverhavehere_ordowe" ''' Let's ignore this error in subsequent test cases. ''' find_job.ignore_error = True try: JobPool().enqueue_job(find_job) assert JobPool().wait_for_all_jobs() is False JobPool().get_asynch_result_object(find_job).get() except JobError as e: assert str(e).find("No such file or directory") != -1, \ "The error we expected is no such file or directory" assert JobPool().get_asynch_result_object(find_job).successful() is \ False, "We expected the job to fail"
def enqueue_buildmodel_job(problem): if problem.parent is not None: JobPool().enqueue_job(problem.jobs["buildmodel"]) for child in problem.children: enqueue_buildmodel_job(child)
Note that this could have been also achived using a join that has only one job.''' for c in root_problem.children: def enq_job_searchfragment(result, next_job): next_job.model = result next_job.fragments = root_problem.fragments JobPool().enqueue_job(next_job) c.jobs["buildmodel"].add_call_Back(lambda result, next_job=c.jobs[ "searchfragment"]: enq_job_searchfragment(result, next_job)) s = 0 lock = Lock() if __name__ == '__main__': pool = JobPool(2) '''build the problem structure''' root_problem = build_subproblems() '''build the dat of jobs''' build_job_dag(root_problem) '''All buildmodel jobs are ready to be started (i.e. no dependency). Queue them up. Once they run, they will automatically enqueue the rest of the DAG through joins and callbacks ''' def enqueue_buildmodel_job(problem): if problem.parent is not None: JobPool().enqueue_job(problem.jobs["buildmodel"]) for child in problem.children: enqueue_buildmodel_job(child) enqueue_buildmodel_job(root_problem) '''Wait for all jobs to finish'''
def run(): global pool pool1 = JobPool(2) pool2 = JobPool() if pool1 != pool2: raise Exception("hmmm, I thought JobPool is 'Singleton'") try: JobPool(4) except Exception as e: print(("As expected, making a new JobPool with a" " different cpu count failed: %s") % e) pool = JobPool() jobs = [] for j in range(1, 20): job = TestJob(str(j)) jobs.append(job) pool.enqueue_job(job) sample_job = pool.get_asynch_result_object(jobs[3]) # pool.terminate() pool.wait_for_all_jobs(ignore_error=True) # Test one of the jobs, to see if it is successful if sample_job.ready() and sample_job.successful(): assert(jobs[3].result_set is True) else: assert(jobs[3].result_set is False) errors = pool.get_all_job_errors() # print("Following job errors were raised:", errors) try: pool.wait_for_all_jobs(ignore_error=False) except Exception as e: print("Seems we have some jobs that failed (expected): ", e) errs = [pool.get_job_error(job) for job in pool.get_failed_jobs()] # print(errs) assert len(errs) == len(errors), \ "Number of errors from failed jobs: %d. Number of errors: %d" % \ (len(errs), len(errors)) assert False not in [x in errors for x in errs] # print [job.state for job in jobs] # print("Number of started jobs - number of printed results:", s) # print("Number of failed jobs:", len(errors)) assert s == len(errors), \ "Parallelization Error, what happened to the rest?"
def enq_job_searchfragment(result, next_job): next_job.model = result next_job.fragments = root_problem.fragments JobPool().enqueue_job(next_job)
return h s = 0 lock = Lock() if __name__ == '__main__': global pool pool1 = JobPool(2) pool2 = JobPool() if pool1 != pool2: raise Exception("hmmm, I thought JobPool is 'Singleton'") try: pool3 = JobPool(4) except Exception as e: print "As expected, making a new JobPool with a different cpu count failed: %s" %e pool = JobPool() jobs = [] for j in range(1,20): job = TestJob(str(j)) jobs.append(job) pool.enqueue_job(job) sample_job = pool.get_asynch_result_object(jobs[3]) #pool.terminate() pool.wait_for_all_jobs(ignore_error=True) # Test one of the jobs, to see if it is successful if sample_job.ready() and sample_job.successful():
def add_a_child(parent): print >> sys.stderr, "Adding a child job for %s" % (parent) JobPool().enqueue_job(TestJob("%s.child" % parent))
step = random() if step < 0.1: raise Exception("Some (truly) random error occurred in job %s." % self.jobname) for i in range(0, 100): h += step * i time.sleep(step / 100) #self.state = step return h s = 0 lock = Lock() if __name__ == '__main__': global pool pool1 = JobPool(2) pool2 = JobPool() if pool1 != pool2: raise Exception("hmmm, I thought JobPool is 'Singleton'") try: pool3 = JobPool(4) except Exception as e: print "As expected, making a new JobPool with a different cpu count failed: %s" % e pool = JobPool() jobs = [] for j in range(1, 20): job = TestJob(str(j)) jobs.append(job) pool.enqueue_job(job)
def enq_job_searchfragment(result, search_job): search_job.hmmmodel = result JobPool().enqueue_job(search_job)
def run(): global pool pool1 = JobPool(2) pool2 = JobPool() if pool1 != pool2: raise Exception("hmmm, I thought JobPool is 'Singleton'") try: JobPool(4) except Exception as e: print(("As expected, making a new JobPool with a" " different cpu count failed: %s") % e) pool = JobPool() jobs = [] for j in range(1, 20): job = TestJob(str(j)) jobs.append(job) pool.enqueue_job(job) sample_job = pool.get_asynch_result_object(jobs[3]) # pool.terminate() pool.wait_for_all_jobs(ignore_error=True) # Test one of the jobs, to see if it is successful if sample_job.ready() and sample_job.successful(): assert (jobs[3].result_set is True) else: assert (jobs[3].result_set is False) errors = pool.get_all_job_errors() # print("Following job errors were raised:", errors) try: pool.wait_for_all_jobs(ignore_error=False) except Exception as e: print("Seems we have some jobs that failed (expected): ", e) errs = [pool.get_job_error(job) for job in pool.get_failed_jobs()] # print(errs) assert len(errs) == len(errors), \ "Number of errors from failed jobs: %d. Number of errors: %d" % \ (len(errs), len(errors)) assert False not in [x in errors for x in errs] # print [job.state for job in jobs] # print("Number of started jobs - number of printed results:", s) # print("Number of failed jobs:", len(errors)) assert s == len(errors), \ "Parallelization Error, what happened to the rest?"
def tearDown(self): # clean up JobPool for other unit tests JobPool().terminate() JobPool().__init__(cpu_count())
def enqueue_firstlevel_job(self): for p in self.root_problem.children: for ap in p.children: JobPool().enqueue_job(ap.jobs["hmmbuild"])
def add_a_child(parent): # print("Adding a child job for %s" % (parent), file=sys.stderr) JobPool().enqueue_job(TestJob("%s.child" % parent))