def _start_merger(self): '''Blocks until the two "subjobs" are done (with new implementation, they will be done, before this is even called) creates the merger job and puts it in the jobs queue, cleans up the alignment subdirectories, signals an event that signifies the fact that the merge job is on queue, and then returns. Called by wait() ''' if self.killed: raise RuntimeError("PastaAligner Job killed") assert (self.subjob1 is not None) result1 = self.subjob1.get_results() if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob1 = None assert (self.subjob2 is not None) result2 = self.subjob2.get_results() self.subjob2 = None if self.killed: raise RuntimeError("PastaAligner Job killed") assert (result1.get_num_loci() == result2.get_num_loci()) mj_list = [] for n, r1 in enumerate(result1): r2 = result2[n] cs = self.context_str + " merger" + str(n) mj = self.pasta_team.merger.create_job( r1, r2, tmp_dir_par=self.tmp_dir_par, delete_temps=self.delete_temps, context_str=cs) mj.add_parent_tickable_job(self) self.add_child(mj) if self.killed: raise RuntimeError("PastaAligner Job killed") mj_list.append(mj) self.merge_job_list = mj_list for mj in mj_list: jobq.put(mj) if self.delete_temps: for d in self._dirs_to_cleanup: self.pasta_team.temp_fs.remove_dir(d) self._merge_queued_event.set()
def _start_merger(self): '''Blocks until the two "subjobs" are done (with new implementation, they will be done, before this is even called) creates the merger job and puts it in the jobs queue, cleans up the alignment subdirectories, signals an event that signifies the fact that the merge job is on queue, and then returns. Called by wait() ''' if self.killed: raise RuntimeError("PastaAligner Job killed") assert(self.subjob1 is not None) result1 = self.subjob1.get_results() if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob1 = None assert(self.subjob2 is not None) result2 = self.subjob2.get_results() self.subjob2 = None if self.killed: raise RuntimeError("PastaAligner Job killed") assert(result1.get_num_loci() == result2.get_num_loci()) mj_list = [] for n, r1 in enumerate(result1): r2 = result2[n] cs = self.context_str + " merger" + str(n) mj = self.pasta_team.merger.create_job(r1, r2, tmp_dir_par=self.tmp_dir_par, delete_temps=self.delete_temps, context_str=cs) mj.add_parent_tickable_job(self) self.add_child(mj) if self.killed: raise RuntimeError("PastaAligner Job killed") mj_list.append(mj) self.merge_job_list = mj_list for mj in mj_list: jobq.put(mj) if self.delete_temps: for d in self._dirs_to_cleanup: self.pasta_team.temp_fs.remove_dir(d) self._merge_queued_event.set()
def run(self, tmp_dir_par, pasta_products=None): assert(os.path.exists(tmp_dir_par)) self._reset_current_run_settings() self._reset_jobs() self.start_time = time.time() self.last_improvement_time = self.start_time num_non_update_iter = 0 configuration = self.configuration() # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves)) if frac_max > self.max_subproblem_size: configuration['max_subproblem_size'] = frac_max MESSENGER.send_info('Max subproblem set to {0}'.format( configuration['max_subproblem_size'])) if configuration['max_subproblem_size'] >= self.tree.n_leaves: MESSENGER.send_warning('''\n WARNING: you have specified a max subproblem ({0}) that is equal to or greater than the number of taxa ({0}). Thus, the PASTA algorithm will not be invoked under the current configuration (i.e., no tree decomposition will occur). If you did not intend for this behavior (which you probably did not since you are using PASTA) please adjust your settings for the max subproblem and try running PASTA again. If you intended to use PASTA to align your data with the specified aligner tool *without* any decomposition, you can ignore this message.\n'''.format(configuration['max_subproblem_size'], self.tree.n_leaves)) if configuration['max_subproblem_size'] == 1: MESSENGER.send_error(''' You have specified a max subproblem size of 1. PASTA requires a max subproblem size of at least 2. ''') sys.exit(1) delete_iteration_temps = not self.keep_iteration_temporaries delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries) configuration['delete_temps'] = delete_realignment_temps while self._keep_iterating(): record_timestamp(os.path.join(tmp_dir_par, 'start_pastaiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) break_strategy_index = 0 this_iter_score_improved = False while True: break_strategy = self._get_break_strategy(break_strategy_index) if not bool(break_strategy): break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_tmp_dir_par) record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("PASTA Job killed") tree_for_aligner = self.get_tree_copy() aligner = PASTAAlignerJob(multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=tree_for_aligner, tmp_base_dir=curr_tmp_dir_par, reset_recursion_index=True, skip_merge=self.pastamerge, **configuration) self.pasta_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) if self.pastamerge: _LOG.debug("Build PASTA merge jobs") subsets_tree = self.build_subsets_tree(curr_tmp_dir_par) if len(self.pasta_team.subsets.values()) == 1: # can happen if there are no decompositions for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = self.pasta_team.subsets.values()[0].get_results() else: pariwise_tmp_dir_par = os.path.join(curr_tmp_dir_par, "pw") pariwise_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(pariwise_tmp_dir_par) pmj = PASTAMergerJob(multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=subsets_tree, tmp_base_dir=pariwise_tmp_dir_par, reset_recursion_index=True, #delete_temps2=False, **configuration) pmj.launch_alignment(context_str=context_str) # Start alignment jobs for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = pmj.get_results() del pmj self.pasta_team.alignmentjobs = [] self.pasta_team.subsets = {} else: new_multilocus_dataset = aligner.get_results() _LOG.debug("Alignment obtained. Preparing for tree.") self.pasta_aligner_job = None del aligner record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("PASTA Job killed") tbj = self.pasta_team.tree_estimator.create_job(new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps, pasta_products=pasta_products, step_num=self.current_iteration, mask_gappy_sites = self.mask_gappy_sites) prev_curr_align = self.curr_iter_align_tmp_filename prev_curr_tree = self.curr_iter_tree_tmp_filename self.curr_iter_align_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) self.curr_iter_tree_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("PASTA Job killed") record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str if this_iter_score_improved: self.status('realignment accepted and score improved.') else: self.status('realignment accepted and despite the score not improving.') # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) break else: self.status('realignment NOT accepted.') self.curr_iter_align_tmp_filename = prev_curr_align self.curr_iter_tree_tmp_filename = prev_curr_tree break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp(os.path.join(tmp_dir_par, 'end_pastaiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta() for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None
def run(self, tmp_dir_par, pasta_products=None): assert (os.path.exists(tmp_dir_par)) self._reset_current_run_settings() self._reset_jobs() self.start_time = time.time() self.last_improvement_time = self.start_time num_non_update_iter = 0 configuration = self.configuration() # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size frac_max = int(math.ceil(self.max_subproblem_frac * self.tree.n_leaves)) if frac_max > self.max_subproblem_size: configuration['max_subproblem_size'] = frac_max MESSENGER.send_info('Max subproblem set to {0}'.format( configuration['max_subproblem_size'])) if configuration['max_subproblem_size'] >= self.tree.n_leaves: MESSENGER.send_warning('''\n WARNING: you have specified a max subproblem ({0}) that is equal to or greater than the number of taxa ({0}). Thus, the PASTA algorithm will not be invoked under the current configuration (i.e., no tree decomposition will occur). If you did not intend for this behavior (which you probably did not since you are using PASTA) please adjust your settings for the max subproblem and try running PASTA again. If you intended to use PASTA to align your data with the specified aligner tool *without* any decomposition, you can ignore this message.\n'''.format(configuration['max_subproblem_size'], self.tree.n_leaves)) if configuration['max_subproblem_size'] == 1: MESSENGER.send_error( ''' You have specified a max subproblem size of 1. PASTA requires a max subproblem size of at least 2. ''' ) sys.exit(1) delete_iteration_temps = not self.keep_iteration_temporaries delete_realignment_temps = delete_iteration_temps or ( not self.keep_realignment_temporaries) configuration['delete_temps'] = delete_realignment_temps while self._keep_iterating(): record_timestamp( os.path.join(tmp_dir_par, 'start_pastaiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join( tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.pasta_team.temp_fs.create_subdir( curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) break_strategy_index = 0 this_iter_score_improved = False while True: break_strategy = self._get_break_strategy(break_strategy_index) if not bool(break_strategy): break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.pasta_team.temp_fs.create_subdir( curr_tmp_dir_par) record_timestamp( os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status( 'Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("PASTA Job killed") tree_for_aligner = self.get_tree_copy() aligner = PASTAAlignerJob( multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=tree_for_aligner, tmp_base_dir=curr_tmp_dir_par, reset_recursion_index=True, skip_merge=self.pastamerge, **configuration) self.pasta_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) if self.pastamerge: _LOG.debug("Build PASTA merge jobs") subsets_tree = self.build_subsets_tree( curr_tmp_dir_par, self.build_MST) if len(self.pasta_team.subsets) == 1: # can happen if there are no decompositions for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = list( self.pasta_team.subsets.values())[0].get_results() else: pariwise_tmp_dir_par = os.path.join( curr_tmp_dir_par, "pw") pariwise_tmp_dir_par = self.pasta_team.temp_fs.create_subdir( pariwise_tmp_dir_par) pmj = PASTAMergerJob( multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=subsets_tree, tmp_base_dir=pariwise_tmp_dir_par, reset_recursion_index=True, #delete_temps2=False, **configuration) pmj.launch_alignment(context_str=context_str) # Start alignment jobs for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = pmj.get_results() del pmj self.pasta_team.alignmentjobs = [] self.pasta_team.subsets = {} else: new_multilocus_dataset = aligner.get_results() _LOG.debug("Alignment obtained. Preparing for tree.") self.pasta_aligner_job = None del aligner record_timestamp( os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status( 'Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("PASTA Job killed") tbj = self.pasta_team.tree_estimator.create_job( new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps, pasta_products=pasta_products, step_num=self.current_iteration, mask_gappy_sites=self.mask_gappy_sites) prev_curr_align = self.curr_iter_align_tmp_filename prev_curr_tree = self.curr_iter_tree_tmp_filename self.curr_iter_align_tmp_filename = pasta_products.get_abs_path_for_iter_output( self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) self.curr_iter_tree_tmp_filename = pasta_products.get_abs_path_for_iter_output( self.current_iteration, TEMP_TREE_TAG, allow_existing=True) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("PASTA Job killed") record_timestamp( os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode( new_score=new_score, break_strategy_index=break_strategy_index ) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str if this_iter_score_improved: self.status('realignment accepted and score improved.') else: self.status( 'realignment accepted and despite the score not improving.' ) # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score)) break else: self.status('realignment NOT accepted.') self.curr_iter_align_tmp_filename = prev_curr_align self.curr_iter_tree_tmp_filename = prev_curr_tree break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp( os.path.join(tmp_dir_par, 'end_pastaiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta( ) for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None
def run(self, *args, **kwargs): start_worker(1) job = self.create_job(*args, **kwargs) jobq.put(job) return job.get_results()
def launch_alignment(self, tree=None, break_strategy=None, context_str=None): '''Puts a alignment job(s) in the queue and then return None get_results() must be called to get the alignment. Note that this call may not be trivial in terms of time (the tree will be decomposed, lots of temporary files may be written...), but the call does not block until completion of the alignments. Rather it queues the alignment jobs so that multiple processors can be exploited if they are available. ''' if self.killed: raise RuntimeError("PastaAligner Job killed") if break_strategy is not None: self.break_strategy = break_strategy break_strategy = self.break_strategy if tree is not None: self.tree = tree self.expected_number_of_taxa = self.multilocus_dataset.get_num_taxa( ) # for debugging purposes self._reset_jobs() prefix = "self.multilocus_dataset.get_num_taxa = %d" % self.expected_number_of_taxa self.context_str = context_str if self.context_str is None: self.context_str = '' _LOG.debug( "Comparing expected_number_of_taxa=%d and max_subproblem_size=%d\n" % (self.expected_number_of_taxa, self.max_subproblem_size)) if self.expected_number_of_taxa <= self.max_subproblem_size: _LOG.debug("%s...Calling Aligner" % prefix) aj_list = [] for index, single_locus_sd in enumerate(self.multilocus_dataset): aj = self.pasta_team.aligner.create_job( single_locus_sd, tmp_dir_par=self.tmp_dir_par, delete_temps=self.delete_temps, context_str=self.context_str + " align" + str(index)) aj.add_parent_tickable_job(self) self.add_child(aj) aj_list.append(aj) if self.killed: raise RuntimeError("PastaAligner Job killed") self.pasta_team.alignmentjobs.append(aj) self.align_job_list = aj_list if self.skip_merge: for taxa in self.tree.leaf_node_names(): self.pasta_team.subsets[taxa] = self else: for aj in aj_list: jobq.put(aj) else: # added by uym2 on August 1st 2017 subjob1, subjob2 = self.bipartition_by_tree(break_strategy) if subjob1 is None or subjob2 is None: return _LOG.debug("%s...Recursing" % prefix) # create the subjobs # the next line was modified by uym2 (August 1st 2017) self.subjob1 = subjob1 self.subjob2 = subjob2 # store this dir so we can use it in the merger if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob1.add_parent(self) self.subjob2.add_parent(self) self.add_child(self.subjob1) self.add_child(self.subjob2) self.subjob1.launch_alignment(break_strategy=break_strategy) if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob2.launch_alignment(break_strategy=break_strategy) if self.killed: raise RuntimeError("PastaAligner Job killed")
def run(self, tmp_dir_par, pasta_products=None): ''' This is to be called from the main pasta method ONLY ''' if self.tmp_dir_par==None: self.tmp_dir_par=tmp_dir_par else: tmp_dir_par=self.tmp_dir_par configuration, delete_iteration_temps=self.run_start(tmp_dir_par, pasta_products) # self.resumable=False while self._keep_iterating(): if self.resumable==False: record_timestamp(os.path.join(tmp_dir_par, 'start_pastaiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) self.break_strategy_index = 0 this_iter_score_improved = False break_strategy = self._get_break_strategy(self.break_strategy_index) if not bool(break_strategy): print "breaking from PastaInteruptableJob..." break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) self.context_str=context_str # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_tmp_dir_par) self.curr_tmp_dir_par=curr_tmp_dir_par record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("PASTA Job killed") tree_for_aligner = self.get_tree_copy() aligner = PASTAInterruptibleAlignerJob(multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=tree_for_aligner, tmp_base_dir=curr_tmp_dir_par, reset_recursion_index=True, skip_merge=self.pastamerge, **configuration) self.pasta_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) # write jobs list aln_job_list=pasta_products.get_abs_path_for_iter_output(self.current_iteration,'alnjoblist.txt') aln_job_list_file=open(aln_job_list,'w') self.aln_job_list_dict=[] for aj in self.pasta_team.alignmentjobs: self.aln_job_list_dict.append({ 'file_read_job':True, 'alignedfn':aj.alignedfn, 'seqfn': aj.seqfn, 'scratch_dir': aj.scratch_dir, 'datatype': aj.datatype, 'context_str': aj.context_str }) aln_job_list_file.write('%s,%s,%s\n' % (aj.scratch_dir, aj.seqfn, aj.alignedfn)) aln_job_list_file.close() self.resumable=True # self.pasta_team.alignmentjobs=[] return aln_job_list, self.resumable else: # for ajd in self.aln_job_list_dict: # self.pasta_team.alignmentjobs.append(self.pasta_team.aligner.create_file_read_job(**ajd)) # for aj in self.pasta_team.alignmentjobs: # jobq.put(aj) self.resumable=False if self.resumable==False: # re-initialize some variables from earlier aligner=self.pasta_aligner_job curr_tmp_dir_par=self.curr_tmp_dir_par context_str=self.context_str if self.pastamerge: # pdb.set_trace() _LOG.debug("Build PASTA merge jobs") subsets_tree = self.build_subsets_tree(curr_tmp_dir_par) if len(self.pasta_team.subsets.values()) == 1: # can happen if there are no decompositions for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = self.pasta_team.subsets.values()[0].get_results() else: pariwise_tmp_dir_par = os.path.join(curr_tmp_dir_par, "pw") pariwise_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(pariwise_tmp_dir_par) pmj = PASTAMergerJob(multilocus_dataset=self.multilocus_dataset, pasta_team=self.pasta_team, tree=subsets_tree, tmp_base_dir=pariwise_tmp_dir_par, reset_recursion_index=True, #delete_temps2=False, **configuration) pmj.launch_alignment(context_str=context_str) # Start alignment jobs for job in self.pasta_team.alignmentjobs: jobq.put(job) new_multilocus_dataset = pmj.get_results() del pmj self.pasta_team.alignmentjobs = [] self.pasta_team.subsets = {} else: new_multilocus_dataset = aligner.get_results() _LOG.debug("Alignment obtained. Preparing for tree.") self.pasta_aligner_job = None del aligner record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("PASTA Job killed") tbj = self.pasta_team.tree_estimator.create_job(new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps, pasta_products=pasta_products, step_num=self.current_iteration, mask_gappy_sites = self.mask_gappy_sites) prev_curr_align = self.curr_iter_align_tmp_filename prev_curr_tree = self.curr_iter_tree_tmp_filename self.curr_iter_align_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) self.curr_iter_tree_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("PASTA Job killed") record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode(new_score=new_score, break_strategy_index=self.break_strategy_index) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str if this_iter_score_improved: self.status('realignment accepted and score improved.') else: self.status('realignment accepted and despite the score not improving.') # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) # break else: self.status('realignment NOT accepted.') self.curr_iter_align_tmp_filename = prev_curr_align self.curr_iter_tree_tmp_filename = prev_curr_tree # break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self.resumable==False: if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp(os.path.join(tmp_dir_par, 'end_pastaiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta() for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None return (None, None)
def launch_alignment(self, tree=None, break_strategy=None, context_str=None): '''Puts a alignment job(s) in the queue and then return None get_results() must be called to get the alignment. Note that this call may not be trivial in terms of time (the tree will be decomposed, lots of temporary files may be written...), but the call does not block until completion of the alignments. Rather it queues the alignment jobs so that multiple processors can be exploited if they are available. ''' if self.killed: raise RuntimeError("PastaAligner Job killed") if break_strategy is not None: self.break_strategy = break_strategy break_strategy = self.break_strategy if tree is not None: self.tree = tree self.expected_number_of_taxa = self.multilocus_dataset.get_num_taxa() # for debugging purposes self._reset_jobs() prefix = "self.multilocus_dataset.get_num_taxa = %d" % self.expected_number_of_taxa self.context_str = context_str if self.context_str is None: self.context_str = '' _LOG.debug("Comparing expected_number_of_taxa=%d and max_subproblem_size=%d\n" % (self.expected_number_of_taxa, self.max_subproblem_size)) if self.expected_number_of_taxa <= self.max_subproblem_size: _LOG.debug("%s...Calling Aligner" % prefix) aj_list = [] for index, single_locus_sd in enumerate(self.multilocus_dataset): aj = self.pasta_team.aligner.create_job(single_locus_sd, tmp_dir_par=self.tmp_dir_par, delete_temps=self.delete_temps, context_str=self.context_str + " align" + str(index)) aj.add_parent_tickable_job(self) self.add_child(aj) aj_list.append(aj) if self.killed: raise RuntimeError("PastaAligner Job killed") self.pasta_team.alignmentjobs.append(aj) self.align_job_list = aj_list if self.skip_merge: for taxa in self.tree.leaf_node_names(): self.pasta_team.subsets[taxa]=self else: for aj in aj_list: jobq.put(aj) else: _LOG.debug("%s...Recursing" % prefix) # create the subjobs self.subjob1, self.subjob2 = self.bipartition_by_tree(break_strategy) # store this dir so we can use it in the merger if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob1.add_parent(self) self.subjob2.add_parent(self) self.add_child(self.subjob1) self.add_child(self.subjob2) self.subjob1.launch_alignment(break_strategy=break_strategy) if self.killed: raise RuntimeError("PastaAligner Job killed") self.subjob2.launch_alignment(break_strategy=break_strategy) if self.killed: raise RuntimeError("PastaAligner Job killed") return