Beispiel #1
0
    def pygmes(self, fasta, db):
        outdir = os.path.join(self.cfg["outdir"], "workfiles", "pygmes")
        faafile = os.path.join(outdir, "predicted_proteins.faa")
        bedfile = os.path.join(outdir, "predicted_proteins.bed")
        self._clean_fasta = os.path.join(
            outdir, "gmesclean_{}".format(os.path.basename(fasta)))
        from pygmes import pygmes

        # check if we need to launch
        need_run = False
        if not file().exists(faafile) or not file().exists(bedfile):
            need_run = True
        elif file.isnewer(fasta, faafile):
            need_run = True
        if need_run:
            pygmes(fasta, outdir, db=db, clean=True, ncores=self.cfg["ncores"])
        # check if pg worked
        if os.path.exists(faafile) and os.path.exists(bedfile):
            if os.stat(faafile).st_size == 0 or os.stat(bedfile).st_size == 0:
                logging.warning("No predicted proteins")
                self.write_outfile()
                exit(1)
            else:
                return (faafile, bedfile)
        else:
            logging.warning("No predicted proteins, pyfaidx failed")
            self.write_outfile()
            exit(1)
Beispiel #2
0
    def runPlacedHMM(self, hmmfile, proteinfaa, bedfile):
        # run hmmer and strip down

        # define output files
        hmmDir = os.path.join(self.cfg["outdir"], "workfiles", "hmmer",
                              "estimations")
        file.isdir(hmmDir)
        hmmOut = os.path.join(hmmDir, "placement.tsv")
        hmmOus = os.path.join(hmmDir, "placement.out")
        hitOut = os.path.join(hmmDir, "hits.tsv")

        h = hmmer(
            "hmmsearch",
            proteinfaa,
            hmmOut,
            self.cfg["debug"],
            touch=self.cfg["touch"],
        )
        if h.doIneedTorun(
                self.cfg["force"]) or self.cfg["fplace"] or file.isnewer(
                    hmmfile, hmmOut):
            logging.info("Running hmmer for chosen locations")
            h.run(
                hmmOus,
                hmmfiles=hmmfile,
                modus=self.cfg["dbinfo"]["modus"],
                evalue=self.cfg["evalue"],
                cores=self.cfg["ncores"],
                training=self.cfg["training"],
            )
            # clean hmmer outpout
            logging.info("Processing Hmmer results")
            hitOut = h.clean(hmmOut, bedfile, hitOut, self.cfg["mindist"])
        return hitOut
Beispiel #3
0
    def gmes(self, fasta):
        """
        predict proteins using gmes
        """
        logging.debug("Starting gmes function")

        gmesDir = os.path.join(self.cfg["outdir"], "workfiles", "gmes")
        file.isdir(gmesDir)
        gmesOut = os.path.join(gmesDir, "prot_seq.faa")
        gtffile = os.path.join(gmesDir, "genemark.gtf")
        inputfasta = os.path.abspath(os.path.join(gmesDir, "input.fna"))

        # GeneMark-ES
        g = gmes("runGMES", fasta, [gtffile, gmesOut], touch=self.cfg["touch"])
        logging.debug("Defined gmes run")
        if g.doIneedTorun(self.cfg["force"]):
            # rename fasta entries, so we dont have white spaces in them
            # can be turned of via cleanfasta in config file
            if not self.cfg["touch"]:
                g.input = base.clearFastaNames(fasta, inputfasta)
            else:
                g.input = inputfasta

            logging.info("Running GeneMark-ES")
            g.run(cores=self.cfg["ncores"])
        else:
            logging.debug("I do not need to run gmes, output exists:")
            logging.debug(gtffile)

        # always check if gtffile exists, if not Genemark-ES failed and
        # we can stop here
        if not file.exists(gtffile):
            # log and document failing
            # then stop pipeline
            logging.error("GeneMark-ES failed on this bin")
            self.write_outfile()
            exit(1)
        elif self.cfg["clean"]:
            # clean temp dirs
            _tmpdirs = ["data", "run", "info", "output/data", "output/gmhmm"]
            tempdirs = [os.path.join(gmesDir, x) for x in _tmpdirs]
            g.cleanup(tempdirs)

        # make a bed file from GTF
        bedf = os.path.join(gmesDir, "proteins.bed")
        if self.cfg["force"] or file.isnewer(gtffile,
                                             bedf) and not self.cfg["touch"]:
            logging.info("Extracting protein locations")
            bedf = base.gmesBED(gtffile, bedf)

        # touch files expected for next step
        if self.cfg["touch"]:
            g.touch([bedf, gmesOut])
        self._clean_fasta = inputfasta
        return (gmesOut, bedf)
Beispiel #4
0
 def doIneedTorun(self, force=False):
     logging.debug("Testing if I need to run this step")
     if force or self.touchonly:
         return True
     else:
         for p in self.output_test:
             x = file.isnewer(self.input, p)
             if x:
                 logging.debug(f"Need to run because of file: {p}")
                 return x
         return x