def run(self, tmp_dir_par, sate_products=None): assert(os.path.exists(tmp_dir_par)) self._reset_current_run_settings() self._reset_jobs() self.start_time = time.time() self.last_improvement_time = self.start_time num_non_update_iter = 0 configuration = self.configuration() # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves)) if frac_max > self.max_subproblem_size: configuration['max_subproblem_size'] = frac_max MESSENGER.send_info('Max subproblem set to {0}'.format( configuration['max_subproblem_size'])) if configuration['max_subproblem_size'] >= self.tree.n_leaves: MESSENGER.send_warning('''\n WARNING: you have specified a max subproblem ({0}) that is equal to or greater than the number of taxa ({0}). Thus, the SATe algorithm will not be invoked under the current configuration (i.e., no tree decomposition will occur). If you did not intend for this behavior (which you probably did not since you are using SATe) please adjust your settings for the max subproblem and try running SATe again. If you intended to use SATe to align your data with the specified aligner tool *without* any decomposition, you can ignore this message.\n'''.format(configuration['max_subproblem_size'], self.tree.n_leaves)) delete_iteration_temps = not self.keep_iteration_temporaries delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries) configuration['delete_temps'] = delete_realignment_temps while self._keep_iterating(): record_timestamp(os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) break_strategy_index = 0 this_iter_score_improved = False while True: break_strategy = self._get_break_strategy(break_strategy_index) if not bool(break_strategy): break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_tmp_dir_par) record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("SATe Job killed") tree_for_aligner = self.get_tree_copy() tree_for_aligner = self.get_tree_copy() aligner = SateAlignerJob(multilocus_dataset=self.multilocus_dataset, sate_team=self.sate_team, tree=tree_for_aligner, tmp_base_dir=curr_tmp_dir_par, reset_recursion_index=True, **configuration) self.sate_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) new_multilocus_dataset = aligner.get_results() self.sate_aligner_job = None del aligner record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("SATe Job killed") tbj = self.sate_team.tree_estimator.create_job(new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps, sate_products=sate_products, step_num=self.current_iteration) prev_curr_align = self.curr_iter_align_tmp_filename prev_curr_tree = self.curr_iter_tree_tmp_filename self.curr_iter_align_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) self.curr_iter_tree_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("SATe Job killed") record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str if this_iter_score_improved: self.status('realignment accepted and score improved.') else: self.status('realignment accepted and despite the score not improving.') # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) break else: self.status('realignment NOT accepted.') self.curr_iter_align_tmp_filename = prev_curr_align self.curr_iter_tree_tmp_filename = prev_curr_tree break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp(os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta() for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None
def run(self, tmp_dir_par, sate_products=None): assert (os.path.exists(tmp_dir_par)) self._reset_current_run_settings() self._reset_jobs() self.start_time = time.time() self.last_improvement_time = self.start_time num_non_update_iter = 0 configuration = self.configuration() # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size frac_max = int(math.ceil(self.max_subproblem_frac * self.tree.n_leaves)) if frac_max > self.max_subproblem_size: configuration['max_subproblem_size'] = frac_max MESSENGER.send_info('Max subproblem set to {0}'.format( configuration['max_subproblem_size'])) if configuration['max_subproblem_size'] >= self.tree.n_leaves: MESSENGER.send_warning('''\n WARNING: you have specified a max subproblem ({0}) that is equal to or greater than the number of taxa ({0}). Thus, the SATe algorithm will not be invoked under the current configuration (i.e., no tree decomposition will occur). If you did not intend for this behavior (which you probably did not since you are using SATe) please adjust your settings for the max subproblem and try running SATe again. If you intended to use SATe to align your data with the specified aligner tool *without* any decomposition, you can ignore this message.\n'''.format(configuration['max_subproblem_size'], self.tree.n_leaves)) delete_iteration_temps = not self.keep_iteration_temporaries delete_realignment_temps = delete_iteration_temps or ( not self.keep_realignment_temporaries) configuration['delete_temps'] = delete_realignment_temps while self._keep_iterating(): record_timestamp( os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join( tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir( curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) break_strategy_index = 0 this_iter_score_improved = False while True: break_strategy = self._get_break_strategy(break_strategy_index) if not bool(break_strategy): break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir( curr_tmp_dir_par) record_timestamp( os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status( 'Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("SATe Job killed") tree_for_aligner = self.get_tree_copy() tree_for_aligner = self.get_tree_copy() aligner = SateAlignerJob( multilocus_dataset=self.multilocus_dataset, sate_team=self.sate_team, tree=tree_for_aligner, tmp_base_dir=curr_tmp_dir_par, reset_recursion_index=True, **configuration) self.sate_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) new_multilocus_dataset = aligner.get_results() self.sate_aligner_job = None del aligner record_timestamp( os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status( 'Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("SATe Job killed") tbj = self.sate_team.tree_estimator.create_job( new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps, sate_products=sate_products, step_num=self.current_iteration) prev_curr_align = self.curr_iter_align_tmp_filename prev_curr_tree = self.curr_iter_tree_tmp_filename self.curr_iter_align_tmp_filename = sate_products.get_abs_path_for_iter_output( self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True) self.curr_iter_tree_tmp_filename = sate_products.get_abs_path_for_iter_output( self.current_iteration, TEMP_TREE_TAG, allow_existing=True) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("SATe Job killed") record_timestamp( os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode( new_score=new_score, break_strategy_index=break_strategy_index ) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str if this_iter_score_improved: self.status('realignment accepted and score improved.') else: self.status( 'realignment accepted and despite the score not improving.' ) # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score)) break else: self.status('realignment NOT accepted.') self.curr_iter_align_tmp_filename = prev_curr_align self.curr_iter_tree_tmp_filename = prev_curr_tree break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp( os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta( ) for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None
def run(self, tmp_dir_par): assert(os.path.exists(tmp_dir_par)) self._reset_current_run_settings() self._reset_jobs() self.start_time = time.time() self.last_improvement_time = self.start_time num_non_update_iter = 0 configuration = self.configuration() # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves)) if frac_max > self.max_subproblem_size: configuration['max_subproblem_size'] = frac_max delete_iteration_temps = not self.keep_iteration_temporaries delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries) configuration['delete_temps'] = delete_realignment_temps while self._keep_iterating(): record_timestamp(os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt')) # create a subdirectory for this iteration curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration)) curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_iter_tmp_dir_par) _LOG.debug('directory %s created' % curr_iter_tmp_dir_par) break_strategy_index = 0 this_iter_score_improved = False while True: break_strategy = self._get_break_strategy(break_strategy_index) if not bool(break_strategy): break context_str = "iter%d-%s" % (self.current_iteration, break_strategy) # create a subdirectory for this iteration/break_strategy curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy) curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_tmp_dir_par) record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt')) # Align (with decomposition...) self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy)) if self.killed: raise RuntimeError("SATe Job killed") tree_for_aligner = self.get_tree_copy() tree_for_aligner = self.get_tree_copy() aligner = SateAlignerJob(multilocus_dataset=self.multilocus_dataset, sate_team=self.sate_team, tree=tree_for_aligner, tmp_dir_par=curr_tmp_dir_par, **configuration) self.sate_aligner_job = aligner aligner.launch_alignment(break_strategy=break_strategy, context_str=context_str) new_multilocus_dataset = aligner.get_results() self.sate_aligner_job = None del aligner record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt')) # Tree inference if self.start_tree_search_from_current: start_from = self.tree else: start_from = None self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration)) if self.killed: raise RuntimeError("SATe Job killed") tbj = self.sate_team.tree_estimator.create_job(new_multilocus_dataset, starting_tree=start_from, num_cpus=self.num_cpus, context_str=context_str + " tree", tmp_dir_par=curr_tmp_dir_par, delete_temps=delete_iteration_temps) self.tree_build_job = tbj jobq.put(tbj) new_score, new_tree_str = tbj.get_results() self.tree_build_job = None del tbj if self.killed: raise RuntimeError("SATe Job killed") record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt')) curr_timestamp = time.time() accept_iteration = False if self.score is None: self.score = new_score if self.best_score is None or new_score > self.best_score: self.store_optimum_results(new_multilocus_dataset, new_tree_str, new_score, curr_timestamp) this_iter_score_improved = True accept_iteration = True if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE: if self.blind_mode_is_final: self.is_stuck_in_blind = True if self.switch_to_blind_timestamp is None: if self._blindmode_trigger: _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger) self.switch_to_blind_iter = self.current_iteration self.switch_to_blind_timestamp = curr_timestamp accept_iteration = True if accept_iteration: self.score = new_score self.multilocus_dataset = new_multilocus_dataset self.tree_str = new_tree_str self.status('realignment accepted.') # we do not want to continue to try different breaking strategies for this iteration so we break self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) break else: self.status('realignment NOT accepted.') break_strategy_index += 1 # self.status('current score: %s, best score: %s' % (self.score, self.best_score) ) if not this_iter_score_improved: self.num_iter_since_imp += 1 self.current_iteration += 1 if self._termination_trigger: _LOG.debug("Termination trigger = %s" % self._termination_trigger) record_timestamp(os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt')) ### TODO: if configuration is 'return_final_iter_T&Apair', then skip the following three lines if not self.return_final_tree_and_alignment: self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta() for locus_alignment in self.best_multilocus_dataset: self.multilocus_dataset.append(copy.copy(locus_alignment)) self.tree_str = self.best_tree_str self.score = self.best_score else: assert self.multilocus_dataset is not None assert self.tree_str is not None assert self.score is not None