예제 #1
0
파일: prottest2.py 프로젝트: a1an77/ete
    def load_jobs(self):
        for m in self.models:
            args = self.args.copy()
            args["--model"] = m
            job = Job(self.conf["app"]["phyml"], args,
                      parent_ids=[self.nodeid], jobname="phyml-bionj")
            job.flag = "phyml"
            self.jobs.append(job)

            if self.lk_mode == "raxml":
                raxml_args = {
                    "-f": "e", 
                    "-s": self.alg_basename,
                    "-m": "PROTGAMMA%s" % m,
                    "-n": self.alg_basename+"."+m,
                    "-t": os.path.join(GLOBALS["tasks_dir"], job.jobid,
                                       self.alg_basename+"_phyml_tree.txt")
                    }
                raxml_job = Job(self.conf["app"]["raxml"], raxml_args,
                                parent_ids=[job.jobid], jobname="raxml-tree-optimize")
                raxml_job.dependencies.add(job)
                raxml_job.flag = "raxml"
                raxml_job.model = m
                self.jobs.append(raxml_job)
            
        log.log(26, "Models to test %s", self.models)
예제 #2
0
파일: trimal.py 프로젝트: a1an77/ete
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = self.args.copy()
     args["-in"] = pjoin(GLOBALS["input_dir"], self.alg_fasta_file)
     args["-out"] = "clean.alg.fasta"
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.alg_fasta_file)
     self.jobs.append(job)
예제 #3
0
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = self.args.copy()
     args["-in"] = pjoin(GLOBALS["input_dir"], self.alg_fasta_file)
     args["-out"] = "clean.alg.fasta"
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.alg_fasta_file)
     self.jobs.append(job)
예제 #4
0
파일: muscle.py 프로젝트: F4L2/devoirs
 def load_jobs(self):
     # Only one Muscle job is necessary to run this task
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     args["-in"] = pjoin(GLOBALS["input_dir"], self.multiseq_file)
     args["-out"] = "alg.fasta"
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.multiseq_file)
     self.jobs.append(job)
예제 #5
0
파일: prottest.py 프로젝트: F4L2/devoirs
    def load_jobs(self):
        conf = self.conf
        for m in self.models:
            args = self.args.copy()
            args["--model"] = m
            bionj_job = Job(conf["app"]["phyml"], args,
                      parent_ids=[self.nodeid])
            bionj_job.jobname += "-bionj-" + m
            bionj_job.jobcat = "bionj"
            bionj_job.add_input_file(self.alg_phylip_file, bionj_job.jobdir)
            self.jobs.append(bionj_job)

            if self.lk_mode == "raxml":
                raxml_args = {
                    "-f": "e", 
                    "-s": pjoin(bionj_job.jobdir, self.alg_phylip_file),
                    "-m": "PROTGAMMA%s" % m,
                    "-n": self.alg_phylip_file+"."+m,
                    "-t": pjoin(bionj_job.jobdir,
                                       self.alg_phylip_file+"_phyml_tree.txt")
                    }
                raxml_job = Job(conf["app"]["raxml"], raxml_args,
                                parent_ids=[bionj_job.jobid])
                raxml_job.jobname += "-lk-optimize"
                raxml_job.dependencies.add(bionj_job)
                raxml_job.model = m
                raxml_job.jobcat = "raxml"
                self.jobs.append(raxml_job)
예제 #6
0
    def load_jobs(self):
        args = self.args.copy()
        args["-outfile"] = "mcoffee.fasta"

        alg_paths = [pjoin(GLOBALS["input_dir"], algid)
                     for algid in self.all_alg_files]
        args["-aln"] = ' '.join(alg_paths)
        job = Job(self.conf["app"]["tcoffee"], args, parent_ids=self.parent_ids)
        for key in self.all_alg_files:
            job.add_input_file(key)
        self.jobs.append(job)
예제 #7
0
파일: mafft.py 프로젝트: F4L2/devoirs
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     # Mafft redirects resulting alg to std.output. The order of
     # arguments is important, input file must be the last
     # one.
     args[""] = pjoin(GLOBALS["input_dir"], self.multiseq_file)
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.multiseq_file)
     job.cores = self.conf["threading"][appname]
     self.jobs.append(job)
예제 #8
0
def seq_reverser_job(multiseq_file, outfile, parent_ids, trimal_bin):
     """ Returns a job reversing all sequences in MSF or MSA. """
     reversion_args = {"-in": multiseq_file, "-out": outfile,
                       "-reverse": "", "-fasta": ""}
     job = Job(trimal_bin, reversion_args, "TrimalAlgReverser",
               parent_ids=parent_ids)
     return job
예제 #9
0
파일: phyml.py 프로젝트: a1an77/ete
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     args["--model"] = self.model
     args["--datatype"] = self.seqtype
     args["--input"] = self.alg_phylip_file
     if self.constrain_tree:
         args["--constraint_tree"] = self.constrain_tree
         args["-u"] = self.constrain_tree
     else:
         del args["--constraint_tree"]
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.alg_phylip_file, job.jobdir)
     if self.constrain_tree:
         job.add_input_file(self.constrain_tree, job.jobdir)
     job.jobname += "-"+self.model
     self.jobs.append(job)
예제 #10
0
파일: uhire.py 프로젝트: F4L2/devoirs
    def load_jobs(self):
        # split the original set of sequences in clusters.
        uhire_args = {
            "--clumpfasta": "./",
            "--maxclump": "%s" % self.conf["uhire"]["_maxclump"],
            "--usersort": "",
            "--uhire": self.multiseq_file,
        }
        uhire_job = Job(self.conf["app"]["usearch"],
                        uhire_args,
                        "usearch-uhire",
                        parent_ids=[self.nodeid])

        # Builds a muscle alignment for each of those clusters. (This
        # is a special job to align all clumps independently. The
        # whole shell command is used as job binary, so it is very
        # important that there is no trailing lines at the end of the
        # command.)
        cmd = """
        (mkdir clumpalgs/;
        for fname in %s/clump.* %s/master;
           do %s -in $fname -out clumpalgs/`basename $fname` -maxiters %s;
        done;) """ % (os.path.join(
            "../", uhire_job.jobid), os.path.join(
                "../", uhire_job.jobid), self.conf["app"]["muscle"],
                      self.conf["uhire"]["_muscle_maxiters"])

        alg_job = Job(cmd, {},
                      "uhire_muscle_algs",
                      parent_ids=[uhire_job.jobid])
        alg_job.dependencies.add(uhire_job)

        # Merge the cluster alignemnts into a single one
        umerge_args = {
            "--maxlen": self.conf["uhire"]["_max_seq_length"],
            "--mergeclumps": "../%s/clumpalgs/" % alg_job.jobid,
            "--output": "alg.fasta",
        }
        umerge_job = Job(self.conf["app"]["usearch"],
                         umerge_args,
                         "usearch-umerge",
                         parent_ids=[alg_job.jobid])
        umerge_job.dependencies.add(alg_job)

        # Add all jobs to the task queue queue
        self.jobs.extend([uhire_job, alg_job, umerge_job])
예제 #11
0
파일: prottest2.py 프로젝트: F4L2/devoirs
    def load_jobs(self):
        for m in self.models:
            args = self.args.copy()
            args["--model"] = m
            job = Job(self.conf["app"]["phyml"],
                      args,
                      parent_ids=[self.nodeid],
                      jobname="phyml-bionj")
            job.flag = "phyml"
            self.jobs.append(job)

            if self.lk_mode == "raxml":
                raxml_args = {
                    "-f":
                    "e",
                    "-s":
                    self.alg_basename,
                    "-m":
                    "PROTGAMMA%s" % m,
                    "-n":
                    self.alg_basename + "." + m,
                    "-t":
                    os.path.join(GLOBALS["tasks_dir"], job.jobid,
                                 self.alg_basename + "_phyml_tree.txt")
                }
                raxml_job = Job(self.conf["app"]["raxml"],
                                raxml_args,
                                parent_ids=[job.jobid],
                                jobname="raxml-tree-optimize")
                raxml_job.dependencies.add(job)
                raxml_job.flag = "raxml"
                raxml_job.model = m
                self.jobs.append(raxml_job)

        log.log(26, "Models to test %s", self.models)
예제 #12
0
파일: phyml.py 프로젝트: F4L2/devoirs
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     args["--model"] = self.model
     args["--datatype"] = self.seqtype
     args["--input"] = self.alg_phylip_file
     if self.constrain_tree:
         args["--constraint_tree"] = self.constrain_tree
         args["-u"] = self.constrain_tree
     else:
         del args["--constraint_tree"]
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.alg_phylip_file, job.jobdir)
     if self.constrain_tree:
         job.add_input_file(self.constrain_tree, job.jobdir)
     job.jobname += "-" + self.model
     self.jobs.append(job)
예제 #13
0
파일: fasttree.py 프로젝트: F4L2/devoirs
    def load_jobs(self):
        args = self.args.copy()

        try:
            del args["-wag"]
        except KeyError:
            pass

        if self.constrain_tree:
            args["-constraints"] = pjoin(GLOBALS["input_dir"],
                                         self.constrain_tree)

        args[pjoin(GLOBALS["input_dir"], self.alg_phylip_file)] = ""
        appname = self.conf[self.confname]["_app"]

        job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
        job.cores = self.conf["threading"][appname]
        if self.constrain_tree:
            job.add_input_file(self.constrain_tree)
        job.add_input_file(self.alg_phylip_file)
        self.jobs.append(job)
예제 #14
0
파일: fasttree.py 프로젝트: a1an77/ete
    def load_jobs(self):
        args = self.args.copy()

        try:
            del args["-wag"]
        except KeyError:
            pass
        
        if self.constrain_tree:
            args["-constraints"] = pjoin(GLOBALS["input_dir"], self.constrain_tree)
            
        args[pjoin(GLOBALS["input_dir"], self.alg_phylip_file)] = ""
        appname = self.conf[self.confname]["_app"]
        
        job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
        job.cores = self.conf["threading"][appname]
        if self.constrain_tree:
            job.add_input_file(self.constrain_tree)
        job.add_input_file(self.alg_phylip_file)
        self.jobs.append(job)
예제 #15
0
    def load_jobs(self):
        readal_bin = self.conf["app"]["readal"]
        trimal_bin = self.conf["app"]["trimal"]
        input_dir = GLOBALS["input_dir"]
        multiseq_file = pjoin(input_dir, self.multiseq_file)
        multiseq_file_r = pjoin(input_dir, self.multiseq_file+"_reversed")
        
        first = seq_reverser_job(multiseq_file, multiseq_file_r, 
                                 [self.nodeid], readal_bin)
        #print self.multiseq_file
        first.add_input_file(self.multiseq_file)
        self.jobs.append(first)
        
        all_alg_names = []
        mcoffee_parents = []
        for aligner_name in self.conf[self.confname]["_aligners"]:
            aligner_name = aligner_name[1:]
            _classname = APP2CLASS[self.conf[aligner_name]["_app"]]

            _module = __import__(CLASS2MODULE[_classname], globals(), locals(), [], -1)
            _aligner = getattr(_module, _classname)

            # Normal alg
            task1 = _aligner(self.nodeid, self.multiseq_file, self.seqtype,
                             self.conf, aligner_name)
            task1.size = self.size
            self.jobs.append(task1)
            all_alg_names.append(task1.alg_fasta_file)
           
            
            # Alg of the reverse
            task2 = _aligner(self.nodeid, self.multiseq_file+"_reversed",
                             self.seqtype, self.conf, aligner_name)
            task2.size = self.size
            task2.dependencies.add(first)
            self.jobs.append(task2)
            
            # Restore reverse alg
            reverse_out = pjoin(input_dir, task2.alg_fasta_file)
            task3 = seq_reverser_job(reverse_out,
                                     reverse_out+"_restored",
                                     [task2.taskid], readal_bin)
            task3.dependencies.add(task2)
            task3.add_input_file(task2.alg_fasta_file)
            all_alg_names.append(reverse_out+"_restored")
            self.jobs.append(task3)
            mcoffee_parents.extend([task1.taskid, task2.taskid])
            
        # Combine signal from all algs using Mcoffee
        mcoffee_task = MCoffee(self.nodeid, self.seqtype, all_alg_names,
                               self.conf, self.confname, parent_ids=mcoffee_parents)
        # reversed algs are not actually saved into db, but it should
        # be present since the reverser job is always executed
        mcoffee_task.dependencies.update(list(self.jobs)) 
        self.jobs.append(mcoffee_task)

        if self.conf[self.confname]["_alg_trimming"]:
            trimming_cutoff = 1.0 / len(all_alg_names)
            targs = {}
            targs["-forceselect"] = pjoin(input_dir, mcoffee_task.alg_fasta_file)
            targs["-compareset"] = pjoin(input_dir, mcoffee_task.alg_list_file)
            targs["-out"] = "mcoffee.trimmed.fasta"
            targs["-fasta"] = ""
            targs["-ct"] = trimming_cutoff
            trim_job = Job(trimal_bin, targs, parent_ids=[mcoffee_task.taskid])
            trim_job.jobname = "McoffeeTrimming"
            trim_job.dependencies.add(mcoffee_task)
            trim_job.alg_fasta_file = targs["-out"]
            for key in all_alg_names:
                trim_job.add_input_file(key)
            trim_job.add_input_file(mcoffee_task.alg_fasta_file)
            trim_job.add_input_file(mcoffee_task.alg_list_file)
            self.jobs.append(trim_job)      
예제 #16
0
파일: jmodeltest.py 프로젝트: F4L2/devoirs
 def load_jobs(self):
     tree_job = Job(self.conf["app"]["jmodeltest"],
                    self.args,
                    parent_ids=[self.nodeid])
     self.jobs.append(tree_job)
예제 #17
0
    def load_jobs(self):
        args = OrderedDict(self.args)
        args["-s"] = pjoin(GLOBALS["input_dir"], self.alg_phylip_file)
        args["-m"] = self.model_string
        args["-n"] = self.alg_phylip_file
        if self.constrain_tree:
            log.log(24, "Using constrain tree %s" % self.constrain_tree)
            args["-g"] = pjoin(GLOBALS["input_dir"], self.constrain_tree)
        if self.partitions_file:
            log.log(24, "Using alg partitions %s" % self.partitions_file)
            args['-q'] = pjoin(GLOBALS["input_dir"], self.partitions_file)

        tree_job = Job(self.raxml_bin, args, parent_ids=[self.nodeid])
        tree_job.jobname += "-" + self.model_string
        tree_job.cores = self.threads
        # Register input files necessary to run the job
        tree_job.add_input_file(self.alg_phylip_file)
        if self.constrain_tree:
            tree_job.add_input_file(self.constrain_tree)
        if self.partitions_file:
            tree_job.add_input_file(self.partitions_file)

        self.jobs.append(tree_job)
        self.out_tree_file = os.path.join(
            tree_job.jobdir, "RAxML_bestTree." + self.alg_phylip_file)

        if self.bootstrap == "alrt":
            alrt_args = tree_job.args.copy()
            if self.constrain_tree:
                del alrt_args["-g"]
            if self.partitions_file:
                alrt_args["-q"] = args['-q']

            alrt_args["-f"] = "J"
            alrt_args["-t"] = self.out_tree_file
            alrt_job = Job(self.raxml_bin,
                           alrt_args,
                           parent_ids=[tree_job.jobid])
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.cores = self.threads

            # Register necessary input files
            alrt_job.add_input_file(self.alg_phylip_file)
            if self.partitions_file:
                alrt_job.add_input_file(self.partitions_file)

            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job

        elif self.bootstrap == "alrt_phyml":
            alrt_args = {
                "-o": "n",
                "-i": self.alg_phylip_file,
                "--bootstrap": "-2",
                "-d": self.seqtype,
                "-u": self.out_tree_file,
                "--model": self.model,
                "--quiet": "",
                "--no_memory_check": "",
            }
            #if self.constrain_tree:
            #    alrt_args["--constraint_tree"] = self.constrain_tree

            alrt_job = Job(self.conf["app"]["phyml"],
                           alrt_args,
                           parent_ids=[tree_job.jobid])
            alrt_job.add_input_file(self.alg_phylip_file, alrt_job.jobdir)
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.add_input_file(self.alg_phylip_file)
            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job

        else:
            # Bootstrap calculation
            boot_args = tree_job.args.copy()
            boot_args["-n"] = "bootstraps." + boot_args["-n"]
            boot_args["-N"] = int(self.bootstrap)
            boot_args["-b"] = 31416
            boot_job = Job(self.raxml_bin,
                           boot_args,
                           parent_ids=[tree_job.jobid])
            boot_job.jobname += "-%d-bootstraps" % (boot_args['-N'])
            boot_job.dependencies.add(tree_job)
            boot_job.cores = self.threads

            # Register necessary input files
            boot_job.add_input_file(self.alg_phylip_file)
            if self.constrain_tree:
                boot_job.add_input_file(self.constrain_tree)
            if self.partitions_file:
                boot_job.add_input_file(self.partitions_file)

            self.jobs.append(boot_job)

            # Bootstrap drawing on top of best tree
            bootd_args = tree_job.args.copy()
            if self.constrain_tree:
                del bootd_args["-g"]
            if self.partitions_file:
                del bootd_args["-q"]

            bootd_args["-n"] = "bootstrapped." + tree_job.args["-n"]
            bootd_args["-f"] = "b"
            bootd_args["-t"] = self.out_tree_file
            bootd_args["-z"] = pjoin(boot_job.jobdir,
                                     "RAxML_bootstrap." + boot_job.args["-n"])

            bootd_job = Job(self.raxml_bin,
                            bootd_args,
                            parent_ids=[tree_job.jobid])
            bootd_job.jobname += "-bootstrapped"
            bootd_job.dependencies.add(boot_job)
            bootd_job.cores = self.threads
            self.jobs.append(bootd_job)

            self.boot_job = boot_job
            self.bootd_job = bootd_job
예제 #18
0
파일: raxml.py 프로젝트: a1an77/ete
    def load_jobs(self):
        args = OrderedDict(self.args)
        args["-s"] = pjoin(GLOBALS["input_dir"], self.alg_phylip_file)
        args["-m"] = self.model_string
        args["-n"] = self.alg_phylip_file
        if self.constrain_tree:
            log.log(24, "Using constrain tree %s" %self.constrain_tree)
            args["-g"] = pjoin(GLOBALS["input_dir"], self.constrain_tree)
        if self.partitions_file:
            log.log(24, "Using alg partitions %s" %self.partitions_file)
            args['-q'] = pjoin(GLOBALS["input_dir"], self.partitions_file)
            
        tree_job = Job(self.raxml_bin, args, parent_ids=[self.nodeid])
        tree_job.jobname += "-"+self.model_string
        tree_job.cores = self.threads
        # Register input files necessary to run the job
        tree_job.add_input_file(self.alg_phylip_file)
        if self.constrain_tree:
            tree_job.add_input_file(self.constrain_tree)
        if self.partitions_file:
            tree_job.add_input_file(self.partitions_file)
            
        self.jobs.append(tree_job)
        self.out_tree_file = os.path.join(tree_job.jobdir,
                                     "RAxML_bestTree." + self.alg_phylip_file)
        
        if self.bootstrap == "alrt":
            alrt_args = tree_job.args.copy()
            if self.constrain_tree:
                del alrt_args["-g"]
            if self.partitions_file:
                alrt_args["-q"] = args['-q']
                
            alrt_args["-f"] =  "J"
            alrt_args["-t"] = self.out_tree_file
            alrt_job = Job(self.raxml_bin, alrt_args,
                           parent_ids=[tree_job.jobid])
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.cores = self.threads

            # Register necessary input files
            alrt_job.add_input_file(self.alg_phylip_file)
            if self.partitions_file:
                alrt_job.add_input_file(self.partitions_file)
            
            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job

        elif self.bootstrap == "alrt_phyml":
            alrt_args = {
                "-o": "n",
                "-i": self.alg_phylip_file,
                "--bootstrap": "-2",
                "-d": self.seqtype,
                "-u": self.out_tree_file,
                "--model": self.model,
                "--quiet": "",
                "--no_memory_check": "",
                }
            #if self.constrain_tree:
            #    alrt_args["--constraint_tree"] = self.constrain_tree
               
            alrt_job = Job(self.conf["app"]["phyml"],
                           alrt_args, parent_ids=[tree_job.jobid])
            alrt_job.add_input_file(self.alg_phylip_file, alrt_job.jobdir)
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.add_input_file(self.alg_phylip_file)
            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job
           
        else:
            # Bootstrap calculation
            boot_args = tree_job.args.copy()
            boot_args["-n"] = "bootstraps."+boot_args["-n"]
            boot_args["-N"] = int(self.bootstrap)
            boot_args["-b"] = 31416
            boot_job = Job(self.raxml_bin, boot_args,
                           parent_ids=[tree_job.jobid])
            boot_job.jobname += "-%d-bootstraps" %(boot_args['-N'])
            boot_job.dependencies.add(tree_job)
            boot_job.cores = self.threads

            # Register necessary input files
            boot_job.add_input_file(self.alg_phylip_file)
            if self.constrain_tree:
                boot_job.add_input_file(self.constrain_tree)
            if self.partitions_file:
                boot_job.add_input_file(self.partitions_file)
            
            self.jobs.append(boot_job)

            # Bootstrap drawing on top of best tree
            bootd_args = tree_job.args.copy()
            if self.constrain_tree:
                del bootd_args["-g"]
            if self.partitions_file:
                del bootd_args["-q"] 
            
            bootd_args["-n"] = "bootstrapped."+ tree_job.args["-n"]
            bootd_args["-f"] = "b"
            bootd_args["-t"] = self.out_tree_file
            bootd_args["-z"] = pjoin(boot_job.jobdir, "RAxML_bootstrap." + boot_job.args["-n"])

            bootd_job = Job(self.raxml_bin, bootd_args,
                            parent_ids=[tree_job.jobid])
            bootd_job.jobname += "-bootstrapped"
            bootd_job.dependencies.add(boot_job)
            bootd_job.cores = self.threads
            self.jobs.append(bootd_job)

            self.boot_job = boot_job
            self.bootd_job = bootd_job