コード例 #1
0
 def start(self):
     assert self.process is None, "Relaunching jobs is not allowed"
     try:
         _LOG.debug('launching %s.\n setting event' % " ".join(self._invocation))
         proc_cwd = self._kwargs.get('cwd', os.curdir)
         k = dict(self._kwargs)
         stdout_file_path = self._kwargs.get('stdout', None)
         stderr_file_path = self._kwargs.get('stderr', None)
         if stdout_file_path:
             self._stdout_fo = open_with_intermediates(
                     stdout_file_path, 'w')
         else:
             self._stdout_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stdout.txt'), 'w')
         k['stdout'] = self._stdout_fo
         if stderr_file_path:
             self._stderr_fo = open_with_intermediates(
                     stderr_file_path, 'w')
         else:
             self._stderr_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stderr.txt'), 'w')
         k['stderr'] = self._stderr_fo
         self.process = Popen(self._invocation, stdin = PIPE, **k)
         self.set_id(self.process.pid)
         #f = open('.%s.pid' % self.get_id(), 'w')
         #f.close()
         _LOG.debug('setting launched_event')
     except:
         self.error = RuntimeError('The invocation:\n"%s"\nfailed' % '" "'.join(self._invocation))
         raise
     finally:
         self.launched_event.set()
コード例 #2
0
ファイル: scheduler.py プロジェクト: sara62/sate-core
 def start(self):
     assert self.process is None, "Relaunching jobs is not allowed"
     try:
         _LOG.debug("launching %s.\n setting event" % " ".join(self._invocation))
         proc_cwd = self._kwargs.get("cwd", os.curdir)
         k = dict(self._kwargs)
         stdout_file_path = self._kwargs.get("stdout", None)
         stderr_file_path = self._kwargs.get("stderr", None)
         if stdout_file_path:
             self._stdout_fo = open_with_intermediates(stdout_file_path, "w")
         else:
             self._stdout_fo = open_with_intermediates(os.path.join(proc_cwd, ".Job.stdout.txt"), "w")
         k["stdout"] = self._stdout_fo
         if stderr_file_path:
             self._stderr_fo = open_with_intermediates(stderr_file_path, "w")
         else:
             self._stderr_fo = open_with_intermediates(os.path.join(proc_cwd, ".Job.stderr.txt"), "w")
         k["stderr"] = self._stderr_fo
         self.process = Popen(self._invocation, stdin=PIPE, **k)
         self.set_id(self.process.pid)
         # f = open('.%s.pid' % self.get_id(), 'w')
         # f.close()
         _LOG.debug("setting launched_event")
     except:
         self.error = RuntimeError('The invocation:\n"%s"\nfailed' % '" "'.join(self._invocation))
         raise
     finally:
         self.launched_event.set()
コード例 #3
0
ファイル: tools.py プロジェクト: rohanmaddamsetti/lib
    def create_job(self, alignment, starting_tree=None, name='default', **kwargs):
        scratch_dir, seqfn, dt, score_fn = self._prepare_input(alignment, **kwargs)
        invoc = [sys.executable,
                self.exe,
                seqfn,
                dt,
                os.path.join(scratch_dir, 'output.tre'),
                ]
        score_fileobj = open_with_intermediates(score_fn, 'w')

        dirs_to_delete = []
        if kwargs.get('delete_temps', self.delete_temps):
            dirs_to_delete.append(scratch_dir)

        def randtree_result_processor(dir=scratch_dir,
                                      to_close=score_fileobj,
                                      score_fn=score_fn,
                                      fn=os.path.join(scratch_dir, 'output.tre'),
                                      dirs_to_delete=dirs_to_delete,
                                      temp_fs=self.temp_fs):
            to_close.close()
            score = float(open(score_fn, 'rU').read().strip())
            tree_str = open(fn, 'rU').read().strip()
            for d in dirs_to_delete:
                temp_fs.remove_dir(d)
            return (score, tree_str)

        job_id = kwargs.get('context_str', '') + '_randtree'
        job = DispatchableJob(invoc,
                              result_processor=randtree_result_processor,
                              cwd=scratch_dir,
                              context_str=job_id,
                              stdout=score_fileobj)
        return job
コード例 #4
0
 def save_to_filepath(self, filepath):
     if filepath is None:
         filepath = os.path.expanduser(os.path.join( '~', '.sate', 'sate.cfg'))
     f = open_with_intermediates(filepath, 'wb')
     for g in self.get_categories():
         g.set_config_parser_fields(self._config_parser)
     self._config_parser.write(f)
     f.close()
コード例 #5
0
 def create_input_files(self,
         job_subdir,
         input_subdir=None):
     src_paths = []
     seq_dir = os.path.join(self.top_dir, job_subdir)
     if input_subdir is not None:
         seq_dir = os.path.join(seq_dir, input_subdir)
     for i in range(5):
         fp = os.path.join(seq_dir, "data%d.fasta" % (i+1))
         src_paths.append(fp)
         f = filemgr.open_with_intermediates(fp, "w")
         f.close()
     return src_paths
コード例 #6
0
ファイル: scheduler.py プロジェクト: rohanmaddamsetti/lib
 def start(self):
     assert self.process is None, "Relaunching jobs is not allowed"
     try:
         _LOG.debug('launching %s.\n setting event' % " ".join(self._invocation))
         proc_cwd = self._kwargs.get('cwd', os.curdir)
         k = dict(self._kwargs)
         if 'stdout' not in self._kwargs:
             self._stdout_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stdout.txt'), 'w')
             k['stdout'] = self._stdout_fo
         if 'stderr' not in self._kwargs:
             self._stderr_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stderr.txt'), 'w')
             k['stderr'] = self._stderr_fo
         self.process = Popen(self._invocation, stdin = PIPE, **k)
         self.set_id(self.process.pid)
         #f = open('.%s.pid' % self.get_id(), 'w')
         #f.close()
         _LOG.debug('setting launched_event')
     except:
         self.error = RuntimeError('The invocation:\n"%s"\nfailed' % '" "'.join(self._invocation))
         raise
     finally:
         self.launched_event.set()
コード例 #7
0
ファイル: tools.py プロジェクト: rohanmaddamsetti/lib
    def create_job(self, alignment, starting_tree=None, **kwargs):
        scratch_dir, seqfn, datatype, options = self._prepare_input(alignment, **kwargs)
        num_cpus = kwargs.get('num_cpus')
        log_file = os.path.join(scratch_dir, 'log');

        invoc = [self.exe, '-quiet']
        if datatype != '':
            invoc.extend([datatype])

        model = self.model  if self.model is not None else ''
        if model != '':
            model = model.split(' ')
            invoc.extend(model)
        if options is not None and len(options) >=1 :
            invoc.extend(options)

        fasttree_result = os.path.join(scratch_dir, 'results')
        results_fileobj = open_with_intermediates(fasttree_result, 'w')

        if starting_tree is not None:
            if isinstance(starting_tree, str):
                tree_str = starting_tree
            else:
                tree_str = starting_tree.compose_newick()
            tree_fn = os.path.join(os.path.abspath(scratch_dir), "start.tre")
            tree_file_obj = open(tree_fn, "w")
            tree_file_obj.write("%s;\n" % tree_str)
            tree_file_obj.close()
            invoc.extend(['-intree', tree_fn])

        invoc.extend(['-log', log_file,    seqfn ])

        if num_cpus > 1:
            os.putenv("OMP_NUM_THREADS", str(num_cpus))
            invoc[0] += 'MP'
#            if platform.system() == 'Windows':
#                x = invoc[0].split('.')
#                x[-2] += 'p'
#                invoc[0] = '.'.join(x)
#            else:
#                invoc[0] += 'p'

        dirs_to_delete = []
        if kwargs.get('delete_temps', self.delete_temps):
            dirs_to_delete.append(scratch_dir)

        rpc = lambda : read_fasttree_results(results_fileobj, scratch_dir, fasttree_result , log_file, delete_dir=kwargs.get('delete_temps', self.delete_temps))
        job_id = kwargs.get('context_str', '') + '_fasttree'
        job = DispatchableJob(invoc, result_processor=rpc, cwd=scratch_dir, stdout=results_fileobj, context_str=job_id)
        return job
コード例 #8
0
ファイル: tools.py プロジェクト: rohanmaddamsetti/lib
 def _write_partition_filepath(self, parfn, partitions, model):
     # partition --- list of tuples, [("DNA", 1, 30), ("DNA", 31, 60), ("PROTEIN", 61, 100)]
     file_obj = open_with_intermediates(parfn,'w')
     count = 0
     for item in partitions:
         key = ""
         count += 1
         if item[0] == "DNA":
             key = "DNA"
         elif item[0] == "PROTEIN":
             if model.startswith("PROTGAMMA"):
                 key = model[len("PROTGAMMAI"):] if model.startswith("PROTGAMMAI") else model[len("PROTGAMMA"):]
             if model.startswith("PROTCAT"):
                 key = model[len("PROTCATI"):] if model.startswith("PROTCATI") else model[len("PROTCAT"):]
         file_obj.write("%s, p%s=%s-%s\n" % (key, count, item[1], item[2]) )
     file_obj.close()
コード例 #9
0
ファイル: tools.py プロジェクト: kibet-gilbert/sate-core
 def _write_partition_filepath(self, parfn, partitions, model):
     # partition --- list of tuples, [("DNA", 1, 30), ("DNA", 31, 60), ("PROTEIN", 61, 100)]
     file_obj = open_with_intermediates(parfn, 'w')
     count = 0
     for item in partitions:
         key = ""
         count += 1
         if item[0] == "DNA":
             key = "DNA"
         elif item[0] == "PROTEIN":
             if model.startswith("PROTGAMMA"):
                 key = model[len("PROTGAMMAI"):] if model.startswith(
                     "PROTGAMMAI") else model[len("PROTGAMMA"):]
             if model.startswith("PROTCAT"):
                 key = model[len("PROTCATI"):] if model.startswith(
                     "PROTCATI") else model[len("PROTCAT"):]
         file_obj.write("%s, p%s=%s-%s\n" % (key, count, item[1], item[2]))
     file_obj.close()
コード例 #10
0
ファイル: tools.py プロジェクト: rohanmaddamsetti/lib
    def create_job(self, alignment, guide_tree=None, **kwargs):
        job_id = kwargs.get('context_str', '') + '_mafft'
        if alignment.get_num_taxa() == 1:
            return FakeJob(alignment, context_str=job_id)

        scratch_dir, seqfn, alignedfn = self._prepare_input(alignment, **kwargs)
        aligned_fileobj = open_with_intermediates(alignedfn, 'w')

        invoc = []
        if platform.system() == "Windows":
            invoc.append(self.exe)
        else:
            invoc.extend([sys.executable, self.exe])
        if len(alignment) <= 200 and alignment.max_sequence_length() < 10000:
            invoc.extend(['--localpair', '--maxiterate', '1000'])
        if '--ep' not in self.user_opts:
            invoc.extend(['--ep', '0.123'])
        invoc.extend(['--quiet', seqfn])
        invoc.extend(self.user_opts)

        # The MAFFT job creation is slightly different from the other
        #   aligners because we redirect and read standard output.

        dirs_to_delete = []
        if kwargs.get('delete_temps', self.delete_temps):
            dirs_to_delete.append(scratch_dir)

        def mafft_result_processor(to_close=aligned_fileobj,
                                    fn=alignedfn,
                                    datatype=alignment.datatype,
                                    dirs_to_delete=dirs_to_delete,
                                    temp_fs=self.temp_fs):
            to_close.close()
            return read_internal_alignment(fn=alignedfn,
                                           datatype=datatype,
                                           dirs_to_delete=dirs_to_delete,
                                           temp_fs=temp_fs)

        job = DispatchableJob(invoc,
                              result_processor=mafft_result_processor,
                              cwd=scratch_dir,
                              stdout=aligned_fileobj,
                              context_str=job_id)
        return job
コード例 #11
0
ファイル: mainsate.py プロジェクト: rohanmaddamsetti/lib
def finish_sate_execution(sate_team,
                          user_config,
                          temporaries_dir,
                          multilocus_dataset,
                          sate_products):
    global _RunningJobs
    # get the RAxML model #TODO: this should check for the tree_estimator.  Currently we only support raxml, so this works...
    model = user_config.raxml.model

    options = user_config.commandline

    user_config.save_to_filepath(os.path.join(temporaries_dir, 'last_used.cfg'))
    if options.timesfile:
        f = open_with_intermediates(options.timesfile, 'a')
        f.close()
        set_timing_log_filepath(options.timesfile)
    ############################################################################
    # We must read the incoming tree in before we call the get_sequences_for_sate
    #   function that relabels that taxa in the dataset
    ######
    tree_file = options.treefile
    if tree_file:
        if not os.path.exists(tree_file):
            raise Exception('The tree file "%s" does not exist' % tree_file)
        tree_f = open(tree_file, 'rU')
        MESSENGER.send_info('Reading starting trees from "%s"...' % tree_file)
        tree_list = read_and_encode_splits(multilocus_dataset.dataset, tree_f)
        tree_f.close()
        if len(tree_list) > 1:
            MESSENGER.send_warning('%d starting trees found in "%s". The first tree will be used.' % (len(tree_list), tree_file))
        starting_tree = tree_list[0]
        score = None

    ############################################################################
    # This will relabel the taxa if they have problematic names
    #####
    multilocus_dataset.relabel_for_sate()

    options.aligned = all( [i.is_aligned() for i in multilocus_dataset] )

    ############################################################################
    # Launch threads to do work
    #####
    sate_config = user_config.get("sate")
    start_worker(sate_config.num_cpus)

    ############################################################################
    # Be prepared to kill any long running jobs
    #####
    prev_signals = []
    for sig in [signal.SIGTERM, signal.SIGABRT, signal.SIGINT]: # signal.SIGABRT, signal.SIGBUS, signal.SIGINT, signal.SIGKILL, signal.SIGSTOP]:
        prev_handler = signal.signal(sig, killed_handler)
        prev_signals.append((sig, prev_handler))

    try:
        if tree_file:
            # getting the newick string here will allow us to get a string that is in terms of the correct taxon labels
            starting_tree_str = starting_tree.compose_newick()
        else:
            MESSENGER.send_info("Performing initial tree search to get starting tree...")
            if not options.aligned:
                MESSENGER.send_info("Performing initial alignment of the entire data matrix...")
                init_aln_dir = os.path.join(temporaries_dir, 'init_aln')
                init_aln_dir = sate_team.temp_fs.create_subdir(init_aln_dir)
                delete_aln_temps = not (options.keeptemp and options.keepalignmenttemps)
                new_alignment_list= []
                for unaligned_seqs in multilocus_dataset:
                    job = sate_team.aligner.create_job(unaligned_seqs,
                                                       tmp_dir_par=init_aln_dir,
                                                       context_str="initalign",
                                                       delete_temps=delete_aln_temps)
                    _RunningJobs = job
                    jobq.put(job)
                    new_alignment = job.get_results()
                    _RunningJobs = None
                    new_alignment_list.append(new_alignment)
                for locus_index, new_alignment in enumerate(new_alignment_list):
                    multilocus_dataset[locus_index] = new_alignment
                if delete_aln_temps:
                    sate_team.temp_fs.remove_dir(init_aln_dir)
            else:
                MESSENGER.send_info("Input sequences assumed to be aligned (based on sequence lengths).")

            MESSENGER.send_info("Performing initial tree search to get starting tree...")
            init_tree_dir = os.path.join(temporaries_dir, 'init_tree')
            init_tree_dir = sate_team.temp_fs.create_subdir(init_tree_dir)
            delete_tree_temps = not options.keeptemp
            job = sate_team.tree_estimator.create_job(multilocus_dataset,
                                                    tmp_dir_par=init_tree_dir,
                                                    num_cpus=sate_config.num_cpus,
                                                    context_str="inittree",
                                                    delete_temps=delete_tree_temps)
            _RunningJobs = job
            jobq.put(job)
            score, starting_tree_str = job.get_results()
            _RunningJobs = None
            if delete_tree_temps:
                sate_team.temp_fs.remove_dir(init_tree_dir)
        _LOG.debug('We have the tree and whole_alignment, partitions...')

        sate_config_dict = sate_config.dict()

        if options.keeptemp:
            sate_config_dict['keep_iteration_temporaries'] = True
            if options.keepalignmenttemps:
                sate_config_dict['keep_realignment_temporaries'] = True

        job = SateJob(multilocus_dataset=multilocus_dataset,
                        sate_team=sate_team,
                        name=options.job,
                        status_messages=MESSENGER.send_info,
                        **sate_config_dict)
        job.tree_str = starting_tree_str
        if score is not None:
            job.store_optimum_results(new_multilocus_dataset=multilocus_dataset,
                    new_tree_str=starting_tree_str,
                    new_score=score,
                    curr_timestamp=time.time())

        _RunningJobs = job
        MESSENGER.send_info("Starting SATe algorithm on initial tree...")
        job.run(tmp_dir_par=temporaries_dir)
        _RunningJobs = None
        job.multilocus_dataset.restore_taxon_names()
        assert len(sate_products.alignment_streams) == len(job.multilocus_dataset)
        for i, alignment in enumerate(job.multilocus_dataset):
            alignment_stream = sate_products.alignment_streams[i]
            MESSENGER.send_info("Writing final alignment to %s" % alignment_stream.name)
            alignment.write(alignment_stream, file_format="FASTA")
            alignment_stream.close()


        MESSENGER.send_info("Writing final tree to %s" % sate_products.tree_stream.name)
        tree_str = job.tree.compose_newick()
        sate_products.tree_stream.write("%s;\n" % tree_str)


        #outtree_fn = options.result
        #if outtree_fn is None:
        #    if options.multilocus:
        #        outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job)
        #    else:
        #        outtree_fn = aln_filename + ".tre"
        #MESSENGER.send_info("Writing final tree to %s" % outtree_fn)
        #tree_str = job.tree.compose_newick()
        #sate_products.tree_stream.write("%s;\n" % tree_str)


        MESSENGER.send_info("Writing final likelihood score to %s" % sate_products.score_stream.name)
        sate_products.score_stream.write("%s\n" % job.score)
    finally:
        for el in prev_signals:
            sig, prev_handler = el
            if prev_handler is None:
                signal.signal(sig, signal.SIG_DFL)
            else:
                signal.signal(sig, prev_handler)
コード例 #12
0
ファイル: mainsate.py プロジェクト: faircloth-lab/sate-core
def finish_sate_execution(sate_team,
                          user_config,
                          temporaries_dir,
                          multilocus_dataset,
                          sate_products):
    global _RunningJobs
    # get the RAxML model #TODO: this should check for the tree_estimator.  Currently we only support raxml, so this works...
    model = user_config.raxml.model

    options = user_config.commandline

    user_config.save_to_filepath(os.path.join(temporaries_dir, 'last_used.cfg'))
    if options.timesfile:
        f = open_with_intermediates(options.timesfile, 'a')
        f.close()
        set_timing_log_filepath(options.timesfile)
    ############################################################################
    # We must read the incoming tree in before we call the get_sequences_for_sate
    #   function that relabels that taxa in the dataset
    ######
    alignment_as_tmp_filename_to_report = None
    tree_as_tmp_filename_to_report = None
    
    tree_file = options.treefile
    if tree_file:
        if not os.path.exists(tree_file):
            raise Exception('The tree file "%s" does not exist' % tree_file)
        tree_f = open(tree_file, 'rU')
        MESSENGER.send_info('Reading starting trees from "%s"...' % tree_file)
        try:
            tree_list = read_and_encode_splits(multilocus_dataset.dataset, tree_f,
                    starting_tree=True)
        except KeyError:
            MESSENGER.send_error("Error in reading the treefile, probably due to a name in the tree that does not match the names in the input sequence files.\n")
            raise
        except:
            MESSENGER.send_error("Error in reading the treefile.\n")
            raise
        tree_f.close()
        if len(tree_list) > 1:
            MESSENGER.send_warning('%d starting trees found in "%s". The first tree will be used.' % (len(tree_list), tree_file))
        starting_tree = tree_list[0]
        score = None
        tree_as_tmp_filename_to_report = tree_file

    ############################################################################
    # This will relabel the taxa if they have problematic names
    #####
    multilocus_dataset.relabel_for_sate()

    ############################################################################
    # This ensures all nucleotide data is DNA internally
    #####
    restore_to_rna = False
    if user_config.commandline.datatype.upper() == 'RNA':
        multilocus_dataset.convert_rna_to_dna()
        user_config.commandline.datatype = 'DNA'
        restore_to_rna = True

    export_names = True
    if export_names:
        try:
            name_filename = sate_products.get_abs_path_for_tag('name_translation.txt')
            name_output = open(name_filename, 'w')
            safe2real = multilocus_dataset.safe_to_real_names
            safe_list = safe2real.keys()
            safe_list.sort()
            for safe in safe_list:
                orig = safe2real[safe][0]
                name_output.write("%s\n%s\n\n" % (safe, orig))
            name_output.close()
            MESSENGER.send_info("Name translation information saved to %s as safe name, original name, blank line format." % name_filename)
        except:
            MESSENGER.send_info("Error exporting saving name translation to %s" % name_filename)
            
    
    if options.aligned:
        options.aligned = all( [i.is_aligned() for i in multilocus_dataset] )

    ############################################################################
    # Launch threads to do work
    #####
    sate_config = user_config.get("sate")
    start_worker(sate_config.num_cpus)

    ############################################################################
    # Be prepared to kill any long running jobs
    #####
    prev_signals = []
    for sig in [signal.SIGTERM, signal.SIGABRT, signal.SIGINT]: # signal.SIGABRT, signal.SIGBUS, signal.SIGINT, signal.SIGKILL, signal.SIGSTOP]:
        prev_handler = signal.signal(sig, killed_handler)
        prev_signals.append((sig, prev_handler))

    try:
        if (not options.two_phase) and tree_file:
            # getting the newick string here will allow us to get a string that is in terms of the correct taxon labels
            starting_tree_str = starting_tree.compose_newick()
        else:
            if not options.two_phase:
                MESSENGER.send_info("Creating a starting tree for the SATe algorithm...")
            if (options.two_phase) or (not options.aligned):
                MESSENGER.send_info("Performing initial alignment of the entire data matrix...")
                init_aln_dir = os.path.join(temporaries_dir, 'init_aln')
                init_aln_dir = sate_team.temp_fs.create_subdir(init_aln_dir)
                delete_aln_temps = not (options.keeptemp and options.keepalignmenttemps)
                new_alignment_list= []
                aln_job_list = []
                for unaligned_seqs in multilocus_dataset:
                    job = sate_team.aligner.create_job(unaligned_seqs,
                                                       tmp_dir_par=init_aln_dir,
                                                       context_str="initalign",
                                                       delete_temps=delete_aln_temps)
                    aln_job_list.append(job)
                _RunningJobs = aln_job_list
                for job in aln_job_list:
                    jobq.put(job)
                for job in aln_job_list:
                    new_alignment = job.get_results()
                    new_alignment_list.append(new_alignment)
                _RunningJobs = None
                for locus_index, new_alignment in enumerate(new_alignment_list):
                    multilocus_dataset[locus_index] = new_alignment
                if delete_aln_temps:
                    sate_team.temp_fs.remove_dir(init_aln_dir)
            else:
                MESSENGER.send_info("Input sequences assumed to be aligned (based on sequence lengths).")

            MESSENGER.send_info("Performing initial tree search to get starting tree...")
            init_tree_dir = os.path.join(temporaries_dir, 'init_tree')
            init_tree_dir = sate_team.temp_fs.create_subdir(init_tree_dir)
            delete_tree_temps = not options.keeptemp
            job = sate_team.tree_estimator.create_job(multilocus_dataset,
                                                    tmp_dir_par=init_tree_dir,
                                                    num_cpus=sate_config.num_cpus,
                                                    context_str="inittree",
                                                    delete_temps=delete_tree_temps,
                                                    sate_products=sate_products,
                                                    step_num='initialsearch')
            _RunningJobs = job
            jobq.put(job)
            score, starting_tree_str = job.get_results()
            _RunningJobs = None
            alignment_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("initialsearch", TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
            tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("initialsearch", TEMP_TREE_TAG, allow_existing=True)
            if delete_tree_temps:
                sate_team.temp_fs.remove_dir(init_tree_dir)
        _LOG.debug('We have the tree and whole_alignment, partitions...')

        sate_config_dict = sate_config.dict()

        if options.keeptemp:
            sate_config_dict['keep_iteration_temporaries'] = True
            if options.keepalignmenttemps:
                sate_config_dict['keep_realignment_temporaries'] = True

        job = SateJob(multilocus_dataset=multilocus_dataset,
                        sate_team=sate_team,
                        name=options.job,
                        status_messages=MESSENGER.send_info,
                        score=score,
                        **sate_config_dict)
        job.tree_str = starting_tree_str
        job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report
        job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report
        if score is not None:
            job.store_optimum_results(new_multilocus_dataset=multilocus_dataset,
                    new_tree_str=starting_tree_str,
                    new_score=score,
                    curr_timestamp=time.time())

        if options.two_phase:
            MESSENGER.send_info("Exiting with the initial tree because the SATe algorithm is avoided when the --two-phase option is used.")
        else:
            _RunningJobs = job
            MESSENGER.send_info("Starting SATe algorithm on initial tree...")
            job.run(tmp_dir_par=temporaries_dir, sate_products=sate_products)
            _RunningJobs = None

            if job.return_final_tree_and_alignment:
                alignment_as_tmp_filename_to_report = job.curr_iter_align_tmp_filename
            else:
                alignment_as_tmp_filename_to_report = job.best_alignment_tmp_filename
            
            if user_config.commandline.raxml_search_after:
                raxml_model = user_config.raxml.model.strip()
                if not raxml_model:
                    dt = user_config.commandline.datatype
                    mf = sate_team.tree_estimator.model
                    ms =  fasttree_to_raxml_model_str(dt, mf)
                    sate_team.raxml_tree_estimator.model = ms
                rte = sate_team.raxml_tree_estimator
                MESSENGER.send_info("Performing post-processing tree search in RAxML...")
                post_tree_dir = os.path.join(temporaries_dir, 'post_tree')
                post_tree_dir = sate_team.temp_fs.create_subdir(post_tree_dir)
                delete_tree_temps = not options.keeptemp
                starting_tree = None
                if user_config.sate.start_tree_search_from_current:
                    starting_tree = job.tree
                post_job = rte.create_job(job.multilocus_dataset,
                                    starting_tree=starting_tree,
                                    num_cpus=sate_config.num_cpus,
                                    context_str="postraxtree",
                                    tmp_dir_par=post_tree_dir,
                                    delete_temps=delete_tree_temps,
                                    sate_products=sate_products,
                                    step_num="postraxtree")
                _RunningJobs = post_job
                jobq.put(post_job)
                post_score, post_tree = post_job.get_results()
                _RunningJobs = None
                tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output("postraxtree", TEMP_TREE_TAG, allow_existing=True)
                if delete_tree_temps:
                    sate_team.temp_fs.remove_dir(post_tree_dir)
                job.tree_str = post_tree
                job.score = post_score
                if post_score > job.best_score:
                    job.best_tree_str = post_tree
                    job.best_score = post_score
            else:
                if job.return_final_tree_and_alignment:
                    tree_as_tmp_filename_to_report = job.curr_iter_tree_tmp_filename
                else:
                    tree_as_tmp_filename_to_report = job.best_tree_tmp_filename


        #######################################################################
        # Restore original taxon names and RNA characters
        #####
        job.multilocus_dataset.restore_taxon_names()
        if restore_to_rna:
            job.multilocus_dataset.convert_dna_to_rna()
            user_config.commandline.datatype = 'RNA'

        assert len(sate_products.alignment_streams) == len(job.multilocus_dataset)
        for i, alignment in enumerate(job.multilocus_dataset):
            alignment_stream = sate_products.alignment_streams[i]
            MESSENGER.send_info("Writing resulting alignment to %s" % alignment_stream.name)
            alignment.write(alignment_stream, file_format="FASTA")
            alignment_stream.close()


        MESSENGER.send_info("Writing resulting tree to %s" % sate_products.tree_stream.name)
        tree_str = job.tree.compose_newick()
        sate_products.tree_stream.write("%s;\n" % tree_str)


        #outtree_fn = options.result
        #if outtree_fn is None:
        #    if options.multilocus:
        #        outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job)
        #    else:
        #        outtree_fn = aln_filename + ".tre"
        #MESSENGER.send_info("Writing resulting tree to %s" % outtree_fn)
        #tree_str = job.tree.compose_newick()
        #sate_products.tree_stream.write("%s;\n" % tree_str)


        MESSENGER.send_info("Writing resulting likelihood score to %s" % sate_products.score_stream.name)
        sate_products.score_stream.write("%s\n" % job.score)
        
        if alignment_as_tmp_filename_to_report is not None:
            MESSENGER.send_info('The resulting alignment (with the names in a "safe" form) was first written as the file "%s"' % alignment_as_tmp_filename_to_report)
        if tree_as_tmp_filename_to_report is not None:
            MESSENGER.send_info('The resulting tree (with the names in a "safe" form) was first written as the file "%s"' % tree_as_tmp_filename_to_report)

    finally:
        for el in prev_signals:
            sig, prev_handler = el
            if prev_handler is None:
                signal.signal(sig, signal.SIG_DFL)
            else:
                signal.signal(sig, prev_handler)
コード例 #13
0
def finish_sate_execution(sate_team, user_config, temporaries_dir,
                          multilocus_dataset, sate_products):
    global _RunningJobs
    # get the RAxML model #TODO: this should check for the tree_estimator.  Currently we only support raxml, so this works...
    model = user_config.raxml.model

    options = user_config.commandline

    user_config.save_to_filepath(os.path.join(temporaries_dir,
                                              'last_used.cfg'))
    if options.timesfile:
        f = open_with_intermediates(options.timesfile, 'a')
        f.close()
        set_timing_log_filepath(options.timesfile)
    ############################################################################
    # We must read the incoming tree in before we call the get_sequences_for_sate
    #   function that relabels that taxa in the dataset
    ######
    alignment_as_tmp_filename_to_report = None
    tree_as_tmp_filename_to_report = None

    tree_file = options.treefile
    if tree_file:
        if not os.path.exists(tree_file):
            raise Exception('The tree file "%s" does not exist' % tree_file)
        tree_f = open(tree_file, 'rU')
        MESSENGER.send_info('Reading starting trees from "%s"...' % tree_file)
        try:
            tree_list = read_and_encode_splits(multilocus_dataset.dataset,
                                               tree_f,
                                               starting_tree=True)
        except KeyError:
            MESSENGER.send_error(
                "Error in reading the treefile, probably due to a name in the tree that does not match the names in the input sequence files.\n"
            )
            raise
        except:
            MESSENGER.send_error("Error in reading the treefile.\n")
            raise
        tree_f.close()
        if len(tree_list) > 1:
            MESSENGER.send_warning(
                '%d starting trees found in "%s". The first tree will be used.'
                % (len(tree_list), tree_file))
        starting_tree = tree_list[0]
        score = None
        tree_as_tmp_filename_to_report = tree_file

    ############################################################################
    # This will relabel the taxa if they have problematic names
    #####
    multilocus_dataset.relabel_for_sate()

    ############################################################################
    # This ensures all nucleotide data is DNA internally
    #####
    restore_to_rna = False
    if user_config.commandline.datatype.upper() == 'RNA':
        multilocus_dataset.convert_rna_to_dna()
        user_config.commandline.datatype = 'DNA'
        restore_to_rna = True

    export_names = True
    if export_names:
        try:
            name_filename = sate_products.get_abs_path_for_tag(
                'name_translation.txt')
            name_output = open(name_filename, 'w')
            safe2real = multilocus_dataset.safe_to_real_names
            safe_list = safe2real.keys()
            safe_list.sort()
            for safe in safe_list:
                orig = safe2real[safe][0]
                name_output.write("%s\n%s\n\n" % (safe, orig))
            name_output.close()
            MESSENGER.send_info(
                "Name translation information saved to %s as safe name, original name, blank line format."
                % name_filename)
        except:
            MESSENGER.send_info(
                "Error exporting saving name translation to %s" %
                name_filename)

    if options.aligned:
        options.aligned = all([i.is_aligned() for i in multilocus_dataset])

    ############################################################################
    # Launch threads to do work
    #####
    sate_config = user_config.get("sate")
    start_worker(sate_config.num_cpus)

    ############################################################################
    # Be prepared to kill any long running jobs
    #####
    prev_signals = []
    for sig in [
            signal.SIGTERM, signal.SIGABRT, signal.SIGINT
    ]:  # signal.SIGABRT, signal.SIGBUS, signal.SIGINT, signal.SIGKILL, signal.SIGSTOP]:
        prev_handler = signal.signal(sig, killed_handler)
        prev_signals.append((sig, prev_handler))

    try:
        if (not options.two_phase) and tree_file:
            # getting the newick string here will allow us to get a string that is in terms of the correct taxon labels
            starting_tree_str = starting_tree.compose_newick()
        else:
            if not options.two_phase:
                MESSENGER.send_info(
                    "Creating a starting tree for the SATe algorithm...")
            if (options.two_phase) or (not options.aligned):
                MESSENGER.send_info(
                    "Performing initial alignment of the entire data matrix..."
                )
                init_aln_dir = os.path.join(temporaries_dir, 'init_aln')
                init_aln_dir = sate_team.temp_fs.create_subdir(init_aln_dir)
                delete_aln_temps = not (options.keeptemp
                                        and options.keepalignmenttemps)
                new_alignment_list = []
                aln_job_list = []
                for unaligned_seqs in multilocus_dataset:
                    job = sate_team.aligner.create_job(
                        unaligned_seqs,
                        tmp_dir_par=init_aln_dir,
                        context_str="initalign",
                        delete_temps=delete_aln_temps)
                    aln_job_list.append(job)
                _RunningJobs = aln_job_list
                for job in aln_job_list:
                    jobq.put(job)
                for job in aln_job_list:
                    new_alignment = job.get_results()
                    new_alignment_list.append(new_alignment)
                _RunningJobs = None
                for locus_index, new_alignment in enumerate(
                        new_alignment_list):
                    multilocus_dataset[locus_index] = new_alignment
                if delete_aln_temps:
                    sate_team.temp_fs.remove_dir(init_aln_dir)
            else:
                MESSENGER.send_info(
                    "Input sequences assumed to be aligned (based on sequence lengths)."
                )

            MESSENGER.send_info(
                "Performing initial tree search to get starting tree...")
            init_tree_dir = os.path.join(temporaries_dir, 'init_tree')
            init_tree_dir = sate_team.temp_fs.create_subdir(init_tree_dir)
            delete_tree_temps = not options.keeptemp
            job = sate_team.tree_estimator.create_job(
                multilocus_dataset,
                tmp_dir_par=init_tree_dir,
                num_cpus=sate_config.num_cpus,
                context_str="inittree",
                delete_temps=delete_tree_temps,
                sate_products=sate_products,
                step_num='initialsearch')
            _RunningJobs = job
            jobq.put(job)
            score, starting_tree_str = job.get_results()
            _RunningJobs = None
            alignment_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output(
                "initialsearch", TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
            tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output(
                "initialsearch", TEMP_TREE_TAG, allow_existing=True)
            if delete_tree_temps:
                sate_team.temp_fs.remove_dir(init_tree_dir)
        _LOG.debug('We have the tree and whole_alignment, partitions...')

        sate_config_dict = sate_config.dict()

        if options.keeptemp:
            sate_config_dict['keep_iteration_temporaries'] = True
            if options.keepalignmenttemps:
                sate_config_dict['keep_realignment_temporaries'] = True

        job = SateJob(multilocus_dataset=multilocus_dataset,
                      sate_team=sate_team,
                      name=options.job,
                      status_messages=MESSENGER.send_info,
                      score=score,
                      **sate_config_dict)
        job.tree_str = starting_tree_str
        job.curr_iter_align_tmp_filename = alignment_as_tmp_filename_to_report
        job.curr_iter_tree_tmp_filename = tree_as_tmp_filename_to_report
        if score is not None:
            job.store_optimum_results(
                new_multilocus_dataset=multilocus_dataset,
                new_tree_str=starting_tree_str,
                new_score=score,
                curr_timestamp=time.time())

        if options.two_phase:
            MESSENGER.send_info(
                "Exiting with the initial tree because the SATe algorithm is avoided when the --two-phase option is used."
            )
        else:
            _RunningJobs = job
            MESSENGER.send_info("Starting SATe algorithm on initial tree...")
            job.run(tmp_dir_par=temporaries_dir, sate_products=sate_products)
            _RunningJobs = None

            if job.return_final_tree_and_alignment:
                alignment_as_tmp_filename_to_report = job.curr_iter_align_tmp_filename
            else:
                alignment_as_tmp_filename_to_report = job.best_alignment_tmp_filename

            if user_config.commandline.raxml_search_after:
                raxml_model = user_config.raxml.model.strip()
                if not raxml_model:
                    dt = user_config.commandline.datatype
                    mf = sate_team.tree_estimator.model
                    ms = fasttree_to_raxml_model_str(dt, mf)
                    sate_team.raxml_tree_estimator.model = ms
                rte = sate_team.raxml_tree_estimator
                MESSENGER.send_info(
                    "Performing post-processing tree search in RAxML...")
                post_tree_dir = os.path.join(temporaries_dir, 'post_tree')
                post_tree_dir = sate_team.temp_fs.create_subdir(post_tree_dir)
                delete_tree_temps = not options.keeptemp
                starting_tree = None
                if user_config.sate.start_tree_search_from_current:
                    starting_tree = job.tree
                post_job = rte.create_job(job.multilocus_dataset,
                                          starting_tree=starting_tree,
                                          num_cpus=sate_config.num_cpus,
                                          context_str="postraxtree",
                                          tmp_dir_par=post_tree_dir,
                                          delete_temps=delete_tree_temps,
                                          sate_products=sate_products,
                                          step_num="postraxtree")
                _RunningJobs = post_job
                jobq.put(post_job)
                post_score, post_tree = post_job.get_results()
                _RunningJobs = None
                tree_as_tmp_filename_to_report = sate_products.get_abs_path_for_iter_output(
                    "postraxtree", TEMP_TREE_TAG, allow_existing=True)
                if delete_tree_temps:
                    sate_team.temp_fs.remove_dir(post_tree_dir)
                job.tree_str = post_tree
                job.score = post_score
                if post_score > job.best_score:
                    job.best_tree_str = post_tree
                    job.best_score = post_score
            else:
                if job.return_final_tree_and_alignment:
                    tree_as_tmp_filename_to_report = job.curr_iter_tree_tmp_filename
                else:
                    tree_as_tmp_filename_to_report = job.best_tree_tmp_filename

        #######################################################################
        # Restore original taxon names and RNA characters
        #####
        job.multilocus_dataset.restore_taxon_names()
        if restore_to_rna:
            job.multilocus_dataset.convert_dna_to_rna()
            user_config.commandline.datatype = 'RNA'

        assert len(sate_products.alignment_streams) == len(
            job.multilocus_dataset)
        for i, alignment in enumerate(job.multilocus_dataset):
            alignment_stream = sate_products.alignment_streams[i]
            MESSENGER.send_info("Writing resulting alignment to %s" %
                                alignment_stream.name)
            alignment.write(alignment_stream, file_format="FASTA")
            alignment_stream.close()

        MESSENGER.send_info("Writing resulting tree to %s" %
                            sate_products.tree_stream.name)
        tree_str = job.tree.compose_newick()
        sate_products.tree_stream.write("%s;\n" % tree_str)

        #outtree_fn = options.result
        #if outtree_fn is None:
        #    if options.multilocus:
        #        outtree_fn = os.path.join(seqdir, "combined_%s.tre" % options.job)
        #    else:
        #        outtree_fn = aln_filename + ".tre"
        #MESSENGER.send_info("Writing resulting tree to %s" % outtree_fn)
        #tree_str = job.tree.compose_newick()
        #sate_products.tree_stream.write("%s;\n" % tree_str)

        MESSENGER.send_info("Writing resulting likelihood score to %s" %
                            sate_products.score_stream.name)
        sate_products.score_stream.write("%s\n" % job.score)

        if alignment_as_tmp_filename_to_report is not None:
            MESSENGER.send_info(
                'The resulting alignment (with the names in a "safe" form) was first written as the file "%s"'
                % alignment_as_tmp_filename_to_report)
        if tree_as_tmp_filename_to_report is not None:
            MESSENGER.send_info(
                'The resulting tree (with the names in a "safe" form) was first written as the file "%s"'
                % tree_as_tmp_filename_to_report)

    finally:
        for el in prev_signals:
            sig, prev_handler = el
            if prev_handler is None:
                signal.signal(sig, signal.SIG_DFL)
            else:
                signal.signal(sig, prev_handler)