Ejemplo n.º 1
0
 def checkForFiles(self, dirname):
     required = ["profile.list", "refpkg", "hmms/concat.hmm", "sets/setinfo.csv"]
     for f in required:
         p = os.path.join(dirname, f)
         if not base.exists(p):
             print("Configuartion folder does not contain: {}".format(f))
             return False
     return True
Ejemplo n.º 2
0
 def readInfo(self, name):
     p = os.path.join(self.cfg["db"], "refpkg", name, "CONTENTS.json")
     # raise error if we cant find the file
     if not base.exists(p):
         log("Could not find {}".format(p))
         exit(13)
     # read and return json
     with open(p) as json_file:
         j = json.load(json_file)
         return j
Ejemplo n.º 3
0
 def pkgfile(self, name, t):
     """
     get a file path for a refpkg package
     """
     info = self.readInfo(name)
     p = os.path.join(self.cfg["db"], "refpkg", name, info["files"][t])
     if base.exists(p):
         return p
     else:
         log("Could not find: {}".format(p))
         exit(12)
Ejemplo n.º 4
0
    def place(self, fasta, bedfile):
        """
        main function to place a bin in the tree.
        will subsequently run hmmer
        """
        # test if we can open the input files first
        if not base.exists(fasta):
            logging.error("Could not open fasta file")
            self.write_outfile()
            exit(1)
        if not base.exists(bedfile):
            logging.error("Could not open bed file")
            self.write_outfile()
            exit(1)

        # define output files
        hmmDir = os.path.join(self.cfg["outdir"], "workfiles", "hmmer")
        file.isdir(hmmDir)
        hmmOut = os.path.join(hmmDir, "placement.tsv")
        hmmOus = os.path.join(hmmDir, "placement.out")
        hitOut = os.path.join(hmmDir, "hits.tsv")

        # run hmmer if forced or input newer than output
        h = hmmer("hmmsearch", fasta, hmmOut, touch=self.cfg["touch"])
        if h.doIneedTorun(self.cfg["force"]) or self.cfg["fplace"]:
            logging.info("Searching for proteins to place in the tree")
            h.run(
                hmmOus,
                hmmfiles=self.config.placementHMMs,
                modus=self.cfg["dbinfo"]["modus"],
                evalue=self.cfg["evalue"],
                cores=self.cfg["ncores"],
            )
            # clean hmmer outpout
            logging.info("Processing Hmmer results")
            hitOut = h.clean(hmmOut, bedfile, hitOut, self.cfg["mindist"])
            self.updateStep("findprots", "looked for proteins")

        # pplacer paths
        placerDir = os.path.join(self.cfg["outdir"], "workfiles", "pplacer")
        placerDirTmp = os.path.join(placerDir, "tmp")
        pplaceAlinment = os.path.join(placerDir, "horizontalAlignment.fasta")
        pplaceOut = os.path.join(placerDir, "placement.jplace")
        pplaceLog = os.path.join(placerDir, "placement.log")
        pplaceOutReduced = os.path.join(placerDir, "placementReduced.jplace")
        file.isdir(placerDirTmp)

        # pplacer
        logging.debug("Preparing pplacer")
        pp = pplacer("pplacer", fasta, pplaceOut, touch=self.cfg["touch"])
        if pp.doIneedTorun(self.cfg["force"]) or self.cfg["fplace"]:
            logging.debug("Preparing alignments")
            pp.prepareAlignment(
                pplaceAlinment,
                hitOut,
                os.path.join(self.cfg["db"], "profile.list"),
                fasta,
                self.config,
                self.cfg,
                placerDirTmp,
            )
            if pp.lenscmgs == 0 and not self.cfg["touch"]:
                logging.error("Could not find any marker genes")
                self.write_outfile()
                exit(1)
            else:
                logging.info("Placing proteins in tree")
                self.updateStep("pplacer", "starting")
                pplacer_success = pp.run(
                    os.path.join(self.cfg["db"], "refpkg", "concat.refpkg"),
                    logfile=pplaceLog,
                    cores=self.cfg["ncorespplacer"],
                )
                if pplacer_success is False:
                    logging.warning("Pplacer could not finish. Exiting now")
                    self.write_outfile()
                    exit(1)

        # reduce placements to the placements with at least posterior of p
        logging.debug("Reducing placements")
        if not self.cfg["touch"]:
            pplaceOutReduced = pp.reduceJplace(
                pplaceOut, pplaceOutReduced,
                self.cfg["minPlacementLikelyhood"])
        else:
            pp.touch([pplaceOutReduced])
        logging.debug("Reducing placements done")
        # run TOG to get a tree
        togTree = os.path.join(placerDir, "placement.tree")
        tg = tog("guppy", pplaceOutReduced, togTree, touch=self.cfg["touch"])
        if tg.doIneedTorun(self.cfg["force"]):
            logging.debug("Fetching tree")
            r = tg.run()
            if r is False:
                logging.debug("No placement found")
                self.write_outfile()

        logging.debug("Getting best placements")
        # save path to togtree for plotting later
        self.cfg["togtreepath"] = togTree
        self.cfg["togjson"] = pplaceOutReduced
        # now we can place the bin using the tree
        if not self.cfg["touch"]:
            t = treelineage.treeHandler(togTree, annotate=False)
            t2 = treelineage.treeHandler(self.config.tree, annotate=False)
            sets = self.getSets()
            # get HCA and LCA placements
            self.placements = {}
            for method in ["LCA", "HPA"]:
                self.placements[method] = t.getPlacement(
                    method,
                    sets,
                    t2,
                    self.cfg["nPlacements"],
                    self.cfg["minSupport"],
                    maximum=self.cfg["nEvals"],
                    debug=self.cfg["debug"],
                )
        else:
            self.placements = {"LCA": "touch", "HCA": "touch"}

        logging.info("MAG succesfully placed in tree")