Ejemplo n.º 1
0
 def load_jobs(self):
     # Only one Muscle job is necessary to run this task
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     args["-in"] = pjoin(GLOBALS["input_dir"], self.multiseq_file)
     args["-out"] = "alg.fasta"
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.multiseq_file)
     self.jobs.append(job)
Ejemplo n.º 2
0
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     # Mafft redirects resulting alg to std.output. The order of
     # arguments is important, input file must be the last
     # one.
     args[""] = pjoin(GLOBALS["input_dir"], self.multiseq_file)
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.multiseq_file)
     job.cores = self.conf["threading"][appname]
     self.jobs.append(job)
Ejemplo n.º 3
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf, confname):
        GLOBALS["citator"].add(MAFFT_CITE)
        
        self.confname = confname
        self.conf = conf
        # Initialize task
        AlgTask.__init__(self, nodeid, "alg", "Mafft", 
                      OrderedDict(), self.conf[confname])

        self.seqtype = seqtype
        self.multiseq_file = multiseq_file     
        self.init()
Ejemplo n.º 4
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf, confname):
        self.confname = confname
        self.conf = conf
        # Initialize task
        AlgTask.__init__(self, nodeid, "alg", "Meta-Alg", 
                         OrderedDict(), self.conf[self.confname])

        self.seqtype = seqtype
        self.multiseq_file = multiseq_file
        self.size = conf["_nodeinfo"][nodeid].get("size", 0)
        self.all_alg_files = None
        self.init()
Ejemplo n.º 5
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf):
        # Initialize task
        AlgTask.__init__(self, nodeid, "alg", "Usearch-Uhire", OrderedDict(),
                         conf["uhire"])

        self.conf = conf
        self.seqtype = seqtype
        self.multiseq_file = multiseq_file

        self.init()

        self.alg_fasta_file = os.path.join(self.taskdir, "final_alg.fasta")
        self.alg_phylip_file = os.path.join(self.taskdir, "final_alg.iphylip")
Ejemplo n.º 6
0
 def __init__(self, nodeid, seqtype, all_alg_files, conf, confname, parent_ids):
     GLOBALS["citator"].add(MCOFFEE_CITE)
     base_args = OrderedDict({
             "-output": "fasta",
             })
     # Initialize task
     self.confname = confname
     self.conf = conf
     AlgTask.__init__(self, nodeid, "alg", "Mcoffee", 
                      base_args, self.conf[confname])
     self.all_alg_files = all_alg_files
     self.parent_ids = parent_ids
     self.seqtype = seqtype
     self.init()
Ejemplo n.º 7
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf, confname):
        GLOBALS["citator"].add(DIALIGN_CITE)

        # fixed options for running this task
        base_args = OrderedDict({
            '': None,
        })
        # Initialize task
        self.confname = confname
        self.conf = conf
        AlgTask.__init__(self, nodeid, "alg", "DialignTX", base_args,
                         self.conf[self.confname])

        self.seqtype = seqtype
        self.multiseq_file = multiseq_file
        self.init()
Ejemplo n.º 8
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf, confname):
        GLOBALS["citator"].add(MUSCLE_CITE)

        # fixed Muscle options
        base_args = OrderedDict({
                '-in': None,
                '-out': None,
                })
        self.confname = confname
        self.conf = conf
        # Initialize task
        AlgTask.__init__(self, nodeid, "alg", "Muscle", 
                      base_args,  self.conf[confname])

        self.seqtype = seqtype
        self.multiseq_file = multiseq_file
        self.init()
Ejemplo n.º 9
0
 def load_jobs(self):
     appname = self.conf[self.confname]["_app"]
     args = OrderedDict(self.args)
     args["--model"] = self.model
     args["--datatype"] = self.seqtype
     args["--input"] = self.alg_phylip_file
     if self.constrain_tree:
         args["--constraint_tree"] = self.constrain_tree
         args["-u"] = self.constrain_tree
     else:
         del args["--constraint_tree"]
     job = Job(self.conf["app"][appname], args, parent_ids=[self.nodeid])
     job.add_input_file(self.alg_phylip_file, job.jobdir)
     if self.constrain_tree:
         job.add_input_file(self.constrain_tree, job.jobdir)
     job.jobname += "-" + self.model
     self.jobs.append(job)
Ejemplo n.º 10
0
    def __init__(self, nodeid, multiseq_file, seqtype, conf, confname):

        GLOBALS["citator"].add(CLUSTALO_CITE)

        base_args = OrderedDict({
            '-i': None,
            '-o': None,
            '--outfmt': "fa",
        })
        self.confname = confname
        self.conf = conf
        # Initialize task
        AlgTask.__init__(self, nodeid, "alg", "Clustal-Omega", base_args,
                         self.conf[self.confname])

        self.seqtype = seqtype
        self.multiseq_file = multiseq_file
        self.init()
Ejemplo n.º 11
0
    def __init__(self,
                 nodeid,
                 alg_file,
                 constrain_id,
                 model,
                 seqtype,
                 conf,
                 confname,
                 parts_id=None):
        GLOBALS["citator"].add(FASTTREE_CITE)

        self.confname = confname
        self.conf = conf
        self.alg_phylip_file = alg_file
        self.constrain_tree = None
        if constrain_id:
            self.constrain_tree = db.get_dataid(constrain_id,
                                                DATATYPES.constrain_alg)
        self.alg_basename = basename(self.alg_phylip_file)
        self.seqtype = seqtype
        self.tree_file = ""
        if model:
            log.warning("FastTree does not support model selection")

        self.model = None
        self.lk = None

        base_args = OrderedDict()
        base_args["-nopr"] = ""
        if self.seqtype == "nt":
            base_args["-gtr -nt"] = ""
        elif self.seqtype == "aa":
            pass
        else:
            raise ValueError("Unknown seqtype %s" % self.seqtype)

        TreeTask.__init__(self, nodeid, "tree", "FastTree", base_args,
                          self.conf[confname])

        self.init()
Ejemplo n.º 12
0
    def __init__(self,
                 nodeid,
                 alg_phylip_file,
                 constrain_id,
                 model,
                 seqtype,
                 conf,
                 confname,
                 parts_id=None):

        GLOBALS["citator"].add(PHYML_CITE)

        base_args = OrderedDict({
            "--model": "",
            "--no_memory_check": "",
            "--quiet": "",
            "--constraint_tree": ""
        })

        self.confname = confname
        self.conf = conf
        self.constrain_tree = None
        if constrain_id:
            self.constrain_tree = db.get_dataid(constrain_id,
                                                DATATYPES.constrain_tree)
        self.alg_phylip_file = alg_phylip_file

        TreeTask.__init__(self, nodeid, "tree", "Phyml", base_args,
                          conf[confname])

        if seqtype == "aa":
            self.model = model or conf[confname]["_aa_model"]
        elif seqtype == "nt":
            self.model = model or conf[confname]["_nt_model"]
        self.seqtype = seqtype
        self.lk = None

        self.init()
Ejemplo n.º 13
0
    def __init__(self,
                 nodeid,
                 alg_file,
                 constrain_id,
                 model,
                 seqtype,
                 conf,
                 confname,
                 parts_id=None):
        GLOBALS["citator"].add(RAXML_CITE)

        base_args = OrderedDict()
        self.bootstrap = conf[confname].get("_bootstrap", None)

        model = model or conf[confname]["_aa_model"]

        self.confname = confname
        self.conf = conf
        self.alg_phylip_file = alg_file

        try:
            self.constrain_tree = db.get_dataid(constrain_id,
                                                DATATYPES.constrain_tree)
        except ValueError:
            self.constrain_tree = None

        self.partitions_file = parts_id

        TreeTask.__init__(self, nodeid, "tree", "RaxML", base_args,
                          conf[confname])

        max_cores = GLOBALS["_max_cores"]
        appname = conf[confname]["_app"]
        if max_cores > 1:
            threads = conf["threading"].get("raxml-pthreads")
            if threads > 1:
                appname = appname.replace("raxml", "raxml-pthreads")
                raxml_bin = conf["app"][appname]
        else:
            appname = appname.replace("raxml-pthreads", "raxml")
            threads = 1
            raxml_bin = conf["app"][appname]

        self.raxml_bin = raxml_bin
        self.threads = threads
        self.seqtype = seqtype

        # Process raxml options
        method = conf[confname].get("_method", "GAMMA").upper()
        if seqtype.lower() == "aa":
            self.model_string = 'PROT%s%s' % (method, model.upper())
            self.model = model
        elif seqtype.lower() == "nt":
            self.model_string = 'GTR%s' % method
            self.model = "GTR"
        else:
            raise ValueError("Unknown seqtype %s", seqtype)
        #inv = conf[confname].get("pinv", "").upper()
        #freq = conf[confname].get("ebf", "").upper()

        self.init()
Ejemplo n.º 14
0
    def load_jobs(self):
        args = OrderedDict(self.args)
        args["-s"] = pjoin(GLOBALS["input_dir"], self.alg_phylip_file)
        args["-m"] = self.model_string
        args["-n"] = self.alg_phylip_file
        if self.constrain_tree:
            log.log(24, "Using constrain tree %s" % self.constrain_tree)
            args["-g"] = pjoin(GLOBALS["input_dir"], self.constrain_tree)
        if self.partitions_file:
            log.log(24, "Using alg partitions %s" % self.partitions_file)
            args['-q'] = pjoin(GLOBALS["input_dir"], self.partitions_file)

        tree_job = Job(self.raxml_bin, args, parent_ids=[self.nodeid])
        tree_job.jobname += "-" + self.model_string
        tree_job.cores = self.threads
        # Register input files necessary to run the job
        tree_job.add_input_file(self.alg_phylip_file)
        if self.constrain_tree:
            tree_job.add_input_file(self.constrain_tree)
        if self.partitions_file:
            tree_job.add_input_file(self.partitions_file)

        self.jobs.append(tree_job)
        self.out_tree_file = os.path.join(
            tree_job.jobdir, "RAxML_bestTree." + self.alg_phylip_file)

        if self.bootstrap == "alrt":
            alrt_args = tree_job.args.copy()
            if self.constrain_tree:
                del alrt_args["-g"]
            if self.partitions_file:
                alrt_args["-q"] = args['-q']

            alrt_args["-f"] = "J"
            alrt_args["-t"] = self.out_tree_file
            alrt_job = Job(self.raxml_bin,
                           alrt_args,
                           parent_ids=[tree_job.jobid])
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.cores = self.threads

            # Register necessary input files
            alrt_job.add_input_file(self.alg_phylip_file)
            if self.partitions_file:
                alrt_job.add_input_file(self.partitions_file)

            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job

        elif self.bootstrap == "alrt_phyml":
            alrt_args = {
                "-o": "n",
                "-i": self.alg_phylip_file,
                "--bootstrap": "-2",
                "-d": self.seqtype,
                "-u": self.out_tree_file,
                "--model": self.model,
                "--quiet": "",
                "--no_memory_check": "",
            }
            #if self.constrain_tree:
            #    alrt_args["--constraint_tree"] = self.constrain_tree

            alrt_job = Job(self.conf["app"]["phyml"],
                           alrt_args,
                           parent_ids=[tree_job.jobid])
            alrt_job.add_input_file(self.alg_phylip_file, alrt_job.jobdir)
            alrt_job.jobname += "-alrt"
            alrt_job.dependencies.add(tree_job)
            alrt_job.add_input_file(self.alg_phylip_file)
            self.jobs.append(alrt_job)
            self.alrt_job = alrt_job

        else:
            # Bootstrap calculation
            boot_args = tree_job.args.copy()
            boot_args["-n"] = "bootstraps." + boot_args["-n"]
            boot_args["-N"] = int(self.bootstrap)
            boot_args["-b"] = 31416
            boot_job = Job(self.raxml_bin,
                           boot_args,
                           parent_ids=[tree_job.jobid])
            boot_job.jobname += "-%d-bootstraps" % (boot_args['-N'])
            boot_job.dependencies.add(tree_job)
            boot_job.cores = self.threads

            # Register necessary input files
            boot_job.add_input_file(self.alg_phylip_file)
            if self.constrain_tree:
                boot_job.add_input_file(self.constrain_tree)
            if self.partitions_file:
                boot_job.add_input_file(self.partitions_file)

            self.jobs.append(boot_job)

            # Bootstrap drawing on top of best tree
            bootd_args = tree_job.args.copy()
            if self.constrain_tree:
                del bootd_args["-g"]
            if self.partitions_file:
                del bootd_args["-q"]

            bootd_args["-n"] = "bootstrapped." + tree_job.args["-n"]
            bootd_args["-f"] = "b"
            bootd_args["-t"] = self.out_tree_file
            bootd_args["-z"] = pjoin(boot_job.jobdir,
                                     "RAxML_bootstrap." + boot_job.args["-n"])

            bootd_job = Job(self.raxml_bin,
                            bootd_args,
                            parent_ids=[tree_job.jobid])
            bootd_job.jobname += "-bootstrapped"
            bootd_job.dependencies.add(boot_job)
            bootd_job.cores = self.threads
            self.jobs.append(bootd_job)

            self.boot_job = boot_job
            self.bootd_job = bootd_job