Ejemplo n.º 1
0
    def run(self, tmp_dir_par, sate_products=None):
        assert(os.path.exists(tmp_dir_par))

        self._reset_current_run_settings()
        self._reset_jobs()

        self.start_time = time.time()
        self.last_improvement_time = self.start_time

        num_non_update_iter = 0

        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max
        MESSENGER.send_info('Max subproblem set to {0}'.format(
                configuration['max_subproblem_size']))
        if configuration['max_subproblem_size'] >= self.tree.n_leaves:
            MESSENGER.send_warning('''\n
WARNING: you have specified a max subproblem ({0}) that is equal to or greater
    than the number of taxa ({0}). Thus, the SATe algorithm will not be invoked
    under the current configuration (i.e., no tree decomposition will occur).
    If you did not intend for this behavior (which you probably did not since
    you are using SATe) please adjust your settings for the max subproblem and
    try running SATe again. If you intended to use SATe to align your data with
    the specified aligner tool *without* any decomposition, you can ignore this
    message.\n'''.format(configuration['max_subproblem_size'],
                       self.tree.n_leaves))
        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps

        while self._keep_iterating():
            record_timestamp(os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt'))

            # create a subdirectory for this iteration
            curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration))
            curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_iter_tmp_dir_par)
            _LOG.debug('directory %s created' % curr_iter_tmp_dir_par)
            break_strategy_index = 0
            this_iter_score_improved = False

            while True:
                break_strategy =  self._get_break_strategy(break_strategy_index)
                if not bool(break_strategy):
                    break
                context_str = "iter%d-%s" % (self.current_iteration, break_strategy)
                # create a subdirectory for this iteration/break_strategy
                curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy)
                curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_tmp_dir_par)
                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt'))
                # Align (with decomposition...)
                self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tree_for_aligner = self.get_tree_copy()
                tree_for_aligner = self.get_tree_copy()
                aligner = SateAlignerJob(multilocus_dataset=self.multilocus_dataset,
                                         sate_team=self.sate_team,
                                         tree=tree_for_aligner,
                                         tmp_base_dir=curr_tmp_dir_par,
                                         reset_recursion_index=True,
                                         **configuration)
                self.sate_aligner_job = aligner
                aligner.launch_alignment(break_strategy=break_strategy,
                                         context_str=context_str)

                new_multilocus_dataset = aligner.get_results()
                self.sate_aligner_job = None
                del aligner


                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt'))
                # Tree inference
                if self.start_tree_search_from_current:
                    start_from = self.tree
                else:
                    start_from = None
                self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tbj = self.sate_team.tree_estimator.create_job(new_multilocus_dataset,
                                                               starting_tree=start_from,
                                                               num_cpus=self.num_cpus,
                                                               context_str=context_str + " tree",
                                                               tmp_dir_par=curr_tmp_dir_par,
                                                               delete_temps=delete_iteration_temps,
                                                               sate_products=sate_products,
                                                               step_num=self.current_iteration)
                prev_curr_align = self.curr_iter_align_tmp_filename
                prev_curr_tree = self.curr_iter_tree_tmp_filename
                self.curr_iter_align_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
                self.curr_iter_tree_tmp_filename = sate_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True)

                self.tree_build_job = tbj
                jobq.put(tbj)
                new_score, new_tree_str = tbj.get_results()
                self.tree_build_job = None
                del tbj
                if self.killed:
                    raise RuntimeError("SATe Job killed")

                record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt'))
                curr_timestamp = time.time()
                accept_iteration = False

                if self.score is None:
                    self.score = new_score

                if self.best_score is None or new_score > self.best_score:
                    self.store_optimum_results(new_multilocus_dataset,
                            new_tree_str,
                            new_score,
                            curr_timestamp)
                    this_iter_score_improved = True
                    accept_iteration = True

                if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE:
                    if self.blind_mode_is_final:
                        self.is_stuck_in_blind = True
                        if self.switch_to_blind_timestamp is None:
                            if self._blindmode_trigger:
                                _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger)
                            self.switch_to_blind_iter = self.current_iteration
                            self.switch_to_blind_timestamp = curr_timestamp
                    accept_iteration = True

                if accept_iteration:
                    self.score = new_score
                    self.multilocus_dataset = new_multilocus_dataset
                    self.tree_str = new_tree_str
                    if this_iter_score_improved:
                        self.status('realignment accepted and score improved.')
                    else:
                        self.status('realignment accepted and despite the score not improving.')
                    # we do not want to continue to try different breaking strategies for this iteration so we break
                    self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
                    break
                else:
                    self.status('realignment NOT accepted.')
                    self.curr_iter_align_tmp_filename = prev_curr_align
                    self.curr_iter_tree_tmp_filename = prev_curr_tree 

                break_strategy_index += 1

                # self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
                
            if not this_iter_score_improved:
                self.num_iter_since_imp += 1
            self.current_iteration += 1

        if self._termination_trigger:
            _LOG.debug("Termination trigger = %s" % self._termination_trigger)
        record_timestamp(os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt'))

        ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines
        if not self.return_final_tree_and_alignment:
            self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta()
            for locus_alignment in self.best_multilocus_dataset:
                self.multilocus_dataset.append(copy.copy(locus_alignment))
            self.tree_str = self.best_tree_str
            self.score = self.best_score
        else:
            assert self.multilocus_dataset is not None
            assert self.tree_str is not None
            assert self.score is not None
Ejemplo n.º 2
0
    def run(self, tmp_dir_par, sate_products=None):
        assert (os.path.exists(tmp_dir_par))

        self._reset_current_run_settings()
        self._reset_jobs()

        self.start_time = time.time()
        self.last_improvement_time = self.start_time

        num_non_update_iter = 0

        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac *
                                 self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max
        MESSENGER.send_info('Max subproblem set to {0}'.format(
            configuration['max_subproblem_size']))
        if configuration['max_subproblem_size'] >= self.tree.n_leaves:
            MESSENGER.send_warning('''\n
WARNING: you have specified a max subproblem ({0}) that is equal to or greater
    than the number of taxa ({0}). Thus, the SATe algorithm will not be invoked
    under the current configuration (i.e., no tree decomposition will occur).
    If you did not intend for this behavior (which you probably did not since
    you are using SATe) please adjust your settings for the max subproblem and
    try running SATe again. If you intended to use SATe to align your data with
    the specified aligner tool *without* any decomposition, you can ignore this
    message.\n'''.format(configuration['max_subproblem_size'],
                         self.tree.n_leaves))
        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (
            not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps

        while self._keep_iterating():
            record_timestamp(
                os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt'))

            # create a subdirectory for this iteration
            curr_iter_tmp_dir_par = os.path.join(
                tmp_dir_par, 'step' + str(self.current_iteration))
            curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir(
                curr_iter_tmp_dir_par)
            _LOG.debug('directory %s created' % curr_iter_tmp_dir_par)
            break_strategy_index = 0
            this_iter_score_improved = False

            while True:
                break_strategy = self._get_break_strategy(break_strategy_index)
                if not bool(break_strategy):
                    break
                context_str = "iter%d-%s" % (self.current_iteration,
                                             break_strategy)
                # create a subdirectory for this iteration/break_strategy
                curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par,
                                                break_strategy)
                curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir(
                    curr_tmp_dir_par)
                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'start_align_timestamp.txt'))
                # Align (with decomposition...)
                self.status(
                    'Step %d. Realigning with decomposition strategy set to %s'
                    % (self.current_iteration, break_strategy))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tree_for_aligner = self.get_tree_copy()
                tree_for_aligner = self.get_tree_copy()
                aligner = SateAlignerJob(
                    multilocus_dataset=self.multilocus_dataset,
                    sate_team=self.sate_team,
                    tree=tree_for_aligner,
                    tmp_base_dir=curr_tmp_dir_par,
                    reset_recursion_index=True,
                    **configuration)
                self.sate_aligner_job = aligner
                aligner.launch_alignment(break_strategy=break_strategy,
                                         context_str=context_str)

                new_multilocus_dataset = aligner.get_results()
                self.sate_aligner_job = None
                del aligner

                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'start_treeinference_timestamp.txt'))
                # Tree inference
                if self.start_tree_search_from_current:
                    start_from = self.tree
                else:
                    start_from = None
                self.status(
                    'Step %d. Alignment obtained. Tree inference beginning...'
                    % (self.current_iteration))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tbj = self.sate_team.tree_estimator.create_job(
                    new_multilocus_dataset,
                    starting_tree=start_from,
                    num_cpus=self.num_cpus,
                    context_str=context_str + " tree",
                    tmp_dir_par=curr_tmp_dir_par,
                    delete_temps=delete_iteration_temps,
                    sate_products=sate_products,
                    step_num=self.current_iteration)
                prev_curr_align = self.curr_iter_align_tmp_filename
                prev_curr_tree = self.curr_iter_tree_tmp_filename
                self.curr_iter_align_tmp_filename = sate_products.get_abs_path_for_iter_output(
                    self.current_iteration,
                    TEMP_SEQ_ALIGNMENT_TAG,
                    allow_existing=True)
                self.curr_iter_tree_tmp_filename = sate_products.get_abs_path_for_iter_output(
                    self.current_iteration, TEMP_TREE_TAG, allow_existing=True)

                self.tree_build_job = tbj
                jobq.put(tbj)
                new_score, new_tree_str = tbj.get_results()
                self.tree_build_job = None
                del tbj
                if self.killed:
                    raise RuntimeError("SATe Job killed")

                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'end_treeinference_timestamp.txt'))
                curr_timestamp = time.time()
                accept_iteration = False

                if self.score is None:
                    self.score = new_score

                if self.best_score is None or new_score > self.best_score:
                    self.store_optimum_results(new_multilocus_dataset,
                                               new_tree_str, new_score,
                                               curr_timestamp)
                    this_iter_score_improved = True
                    accept_iteration = True

                if self._get_accept_mode(
                        new_score=new_score,
                        break_strategy_index=break_strategy_index
                ) == AcceptMode.BLIND_MODE:
                    if self.blind_mode_is_final:
                        self.is_stuck_in_blind = True
                        if self.switch_to_blind_timestamp is None:
                            if self._blindmode_trigger:
                                _LOG.debug("Blind runmode trigger = %s" %
                                           self._blindmode_trigger)
                            self.switch_to_blind_iter = self.current_iteration
                            self.switch_to_blind_timestamp = curr_timestamp
                    accept_iteration = True

                if accept_iteration:
                    self.score = new_score
                    self.multilocus_dataset = new_multilocus_dataset
                    self.tree_str = new_tree_str
                    if this_iter_score_improved:
                        self.status('realignment accepted and score improved.')
                    else:
                        self.status(
                            'realignment accepted and despite the score not improving.'
                        )
                    # we do not want to continue to try different breaking strategies for this iteration so we break
                    self.status('current score: %s, best score: %s' %
                                (self.score, self.best_score))
                    break
                else:
                    self.status('realignment NOT accepted.')
                    self.curr_iter_align_tmp_filename = prev_curr_align
                    self.curr_iter_tree_tmp_filename = prev_curr_tree

                break_strategy_index += 1

                # self.status('current score: %s, best score: %s' % (self.score, self.best_score) )

            if not this_iter_score_improved:
                self.num_iter_since_imp += 1
            self.current_iteration += 1

        if self._termination_trigger:
            _LOG.debug("Termination trigger = %s" % self._termination_trigger)
        record_timestamp(
            os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt'))

        ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines
        if not self.return_final_tree_and_alignment:
            self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta(
            )
            for locus_alignment in self.best_multilocus_dataset:
                self.multilocus_dataset.append(copy.copy(locus_alignment))
            self.tree_str = self.best_tree_str
            self.score = self.best_score
        else:
            assert self.multilocus_dataset is not None
            assert self.tree_str is not None
            assert self.score is not None
Ejemplo n.º 3
0
    def run(self, tmp_dir_par):
        assert(os.path.exists(tmp_dir_par))

        self._reset_current_run_settings()
        self._reset_jobs()

        self.start_time = time.time()
        self.last_improvement_time = self.start_time

        num_non_update_iter = 0

        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max

        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps

        while self._keep_iterating():
            record_timestamp(os.path.join(tmp_dir_par, 'start_sateiter_timestamp.txt'))

            # create a subdirectory for this iteration
            curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration))
            curr_iter_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_iter_tmp_dir_par)
            _LOG.debug('directory %s created' % curr_iter_tmp_dir_par)
            break_strategy_index = 0
            this_iter_score_improved = False

            while True:
                break_strategy =  self._get_break_strategy(break_strategy_index)
                if not bool(break_strategy):
                    break
                context_str = "iter%d-%s" % (self.current_iteration, break_strategy)
                # create a subdirectory for this iteration/break_strategy
                curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy)
                curr_tmp_dir_par = self.sate_team.temp_fs.create_subdir(curr_tmp_dir_par)
                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt'))
                # Align (with decomposition...)
                self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tree_for_aligner = self.get_tree_copy()
                tree_for_aligner = self.get_tree_copy()
                aligner = SateAlignerJob(multilocus_dataset=self.multilocus_dataset,
                                         sate_team=self.sate_team,
                                         tree=tree_for_aligner,
                                         tmp_dir_par=curr_tmp_dir_par,
                                         **configuration)
                self.sate_aligner_job = aligner
                aligner.launch_alignment(break_strategy=break_strategy,
                                         context_str=context_str)

                new_multilocus_dataset = aligner.get_results()
                self.sate_aligner_job = None
                del aligner


                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt'))
                # Tree inference
                if self.start_tree_search_from_current:
                    start_from = self.tree
                else:
                    start_from = None
                self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration))
                if self.killed:
                    raise RuntimeError("SATe Job killed")
                tbj = self.sate_team.tree_estimator.create_job(new_multilocus_dataset,
                                                               starting_tree=start_from,
                                                               num_cpus=self.num_cpus,
                                                               context_str=context_str + " tree",
                                                               tmp_dir_par=curr_tmp_dir_par,
                                                               delete_temps=delete_iteration_temps)
                self.tree_build_job = tbj
                jobq.put(tbj)
                new_score, new_tree_str = tbj.get_results()
                self.tree_build_job = None
                del tbj
                if self.killed:
                    raise RuntimeError("SATe Job killed")

                record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt'))
                curr_timestamp = time.time()
                accept_iteration = False

                if self.score is None:
                    self.score = new_score

                if self.best_score is None or new_score > self.best_score:
                    self.store_optimum_results(new_multilocus_dataset,
                            new_tree_str,
                            new_score,
                            curr_timestamp)
                    this_iter_score_improved = True
                    accept_iteration = True

                if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE:
                    if self.blind_mode_is_final:
                        self.is_stuck_in_blind = True
                        if self.switch_to_blind_timestamp is None:
                            if self._blindmode_trigger:
                                _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger)
                            self.switch_to_blind_iter = self.current_iteration
                            self.switch_to_blind_timestamp = curr_timestamp
                    accept_iteration = True

                if accept_iteration:
                    self.score = new_score
                    self.multilocus_dataset = new_multilocus_dataset
                    self.tree_str = new_tree_str
                    self.status('realignment accepted.')
                    # we do not want to continue to try different breaking strategies for this iteration so we break
                    self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
                    break
                else:
                    self.status('realignment NOT accepted.')
                break_strategy_index += 1

                # self.status('current score: %s, best score: %s' % (self.score, self.best_score) )

            if not this_iter_score_improved:
                self.num_iter_since_imp += 1

            self.current_iteration += 1

        if self._termination_trigger:
            _LOG.debug("Termination trigger = %s" % self._termination_trigger)
        record_timestamp(os.path.join(tmp_dir_par, 'end_sateiter_timestamp.txt'))

        ### TODO: if configuration is 'return_final_iter_T&Apair', then skip the following three lines
        if not self.return_final_tree_and_alignment:
            self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta()
            for locus_alignment in self.best_multilocus_dataset:
                self.multilocus_dataset.append(copy.copy(locus_alignment))
            self.tree_str = self.best_tree_str
            self.score = self.best_score
        else:
            assert self.multilocus_dataset is not None
            assert self.tree_str is not None
            assert self.score is not None