コード例 #1
0
ファイル: workflow.py プロジェクト: alienzj/EukCC
    def gmes(self, fasta):
        """
        predict proteins using gmes
        """
        logging.debug("Starting gmes function")

        gmesDir = os.path.join(self.cfg["outdir"], "workfiles", "gmes")
        file.isdir(gmesDir)
        gmesOut = os.path.join(gmesDir, "prot_seq.faa")
        gtffile = os.path.join(gmesDir, "genemark.gtf")
        inputfasta = os.path.abspath(os.path.join(gmesDir, "input.fna"))

        # GeneMark-ES
        g = gmes("runGMES", fasta, [gtffile, gmesOut], touch=self.cfg["touch"])
        logging.debug("Defined gmes run")
        if g.doIneedTorun(self.cfg["force"]):
            # rename fasta entries, so we dont have white spaces in them
            # can be turned of via cleanfasta in config file
            if not self.cfg["touch"]:
                g.input = base.clearFastaNames(fasta, inputfasta)
            else:
                g.input = inputfasta

            logging.info("Running GeneMark-ES")
            g.run(cores=self.cfg["ncores"])
        else:
            logging.debug("I do not need to run gmes, output exists:")
            logging.debug(gtffile)

        # always check if gtffile exists, if not Genemark-ES failed and
        # we can stop here
        if not file.exists(gtffile):
            # log and document failing
            # then stop pipeline
            logging.error("GeneMark-ES failed on this bin")
            self.write_outfile()
            exit(1)
        elif self.cfg["clean"]:
            # clean temp dirs
            _tmpdirs = ["data", "run", "info", "output/data", "output/gmhmm"]
            tempdirs = [os.path.join(gmesDir, x) for x in _tmpdirs]
            g.cleanup(tempdirs)

        # make a bed file from GTF
        bedf = os.path.join(gmesDir, "proteins.bed")
        if self.cfg["force"] or file.isnewer(gtffile,
                                             bedf) and not self.cfg["touch"]:
            logging.info("Extracting protein locations")
            bedf = base.gmesBED(gtffile, bedf)

        # touch files expected for next step
        if self.cfg["touch"]:
            g.touch([bedf, gmesOut])
        self._clean_fasta = inputfasta
        return (gmesOut, bedf)
コード例 #2
0
ファイル: workflow.py プロジェクト: shulp2211/EukCC
    def concatHMM(self):
        # create a dir for this
        hmmdir = os.path.join(self.cfg["outdir"], "workfiles", "hmmer", "estimations")
        file.isdir(hmmdir)
        hmmconcat = os.path.join(hmmdir, "all.hmm")

        if self.cfg["touch"]:
            file.touch(hmmconcat)
            return hmmconcat

        profiles = set()
        for p in self.placements[self.cfg["placementMethod"]]:
            localpath = os.path.join(self.cfg["db"], "sets", "{}.set".format(p["node"]))
            with open(localpath) as f:
                for line in f:
                    profiles.add(line.strip())
        # make profiles to sorted list
        profiles = list(profiles)
        profiles.sort()
        # create all paths for all hmms
        hmmerpaths = [os.path.join(self.cfg["db"], "hmms", "panther", "{}.hmm".format(profile)) for profile in profiles]
        # sort and check if we already have the hmm for this
        canuseprev = False
        profilehash = hashlib.sha256("_".join(profiles).encode()).hexdigest()
        hashpath = os.path.join(hmmdir, "all.hash")
        if file.exists(hashpath):
            with open(hashpath) as f:
                for line in f:
                    prevhash = line.strip()
                    break
            canuseprev = prevhash == profilehash

        if canuseprev:
            # we can use the existing file, so no need to continue
            log("Using pressed hmms from last run")
            return hmmconcat

        # concatenate
        if len(profiles) == 0:
            logging.error("We have no profiles to evaluate")
            exit(1)

        log("{} hmm profiles need to be used for estimations".format(len(profiles)))
        log("Concatenating hmms, this might take a while (IO limited)")
        hmmconcat = base.concatenate(hmmconcat, hmmerpaths)
        # press
        log("Pressing hmms")
        hp = hmmpress("hmmpress", hmmconcat, None, touch=self.cfg["touch"])
        hp.run()

        # save profile hash
        with open(hashpath, "w") as f:
            f.write(f"{profilehash}")

        return hmmconcat