def checkForFiles(self, dirname): required = ["profile.list", "refpkg", "hmms/concat.hmm", "sets/setinfo.csv"] for f in required: p = os.path.join(dirname, f) if not base.exists(p): print("Configuartion folder does not contain: {}".format(f)) return False return True
def readInfo(self, name): p = os.path.join(self.cfg["db"], "refpkg", name, "CONTENTS.json") # raise error if we cant find the file if not base.exists(p): log("Could not find {}".format(p)) exit(13) # read and return json with open(p) as json_file: j = json.load(json_file) return j
def pkgfile(self, name, t): """ get a file path for a refpkg package """ info = self.readInfo(name) p = os.path.join(self.cfg["db"], "refpkg", name, info["files"][t]) if base.exists(p): return p else: log("Could not find: {}".format(p)) exit(12)
def place(self, fasta, bedfile): """ main function to place a bin in the tree. will subsequently run hmmer """ # test if we can open the input files first if not base.exists(fasta): logging.error("Could not open fasta file") self.write_outfile() exit(1) if not base.exists(bedfile): logging.error("Could not open bed file") self.write_outfile() exit(1) # define output files hmmDir = os.path.join(self.cfg["outdir"], "workfiles", "hmmer") file.isdir(hmmDir) hmmOut = os.path.join(hmmDir, "placement.tsv") hmmOus = os.path.join(hmmDir, "placement.out") hitOut = os.path.join(hmmDir, "hits.tsv") # run hmmer if forced or input newer than output h = hmmer("hmmsearch", fasta, hmmOut, touch=self.cfg["touch"]) if h.doIneedTorun(self.cfg["force"]) or self.cfg["fplace"]: logging.info("Searching for proteins to place in the tree") h.run( hmmOus, hmmfiles=self.config.placementHMMs, modus=self.cfg["dbinfo"]["modus"], evalue=self.cfg["evalue"], cores=self.cfg["ncores"], ) # clean hmmer outpout logging.info("Processing Hmmer results") hitOut = h.clean(hmmOut, bedfile, hitOut, self.cfg["mindist"]) self.updateStep("findprots", "looked for proteins") # pplacer paths placerDir = os.path.join(self.cfg["outdir"], "workfiles", "pplacer") placerDirTmp = os.path.join(placerDir, "tmp") pplaceAlinment = os.path.join(placerDir, "horizontalAlignment.fasta") pplaceOut = os.path.join(placerDir, "placement.jplace") pplaceLog = os.path.join(placerDir, "placement.log") pplaceOutReduced = os.path.join(placerDir, "placementReduced.jplace") file.isdir(placerDirTmp) # pplacer logging.debug("Preparing pplacer") pp = pplacer("pplacer", fasta, pplaceOut, touch=self.cfg["touch"]) if pp.doIneedTorun(self.cfg["force"]) or self.cfg["fplace"]: logging.debug("Preparing alignments") pp.prepareAlignment( pplaceAlinment, hitOut, os.path.join(self.cfg["db"], "profile.list"), fasta, self.config, self.cfg, placerDirTmp, ) if pp.lenscmgs == 0 and not self.cfg["touch"]: logging.error("Could not find any marker genes") self.write_outfile() exit(1) else: logging.info("Placing proteins in tree") self.updateStep("pplacer", "starting") pplacer_success = pp.run( os.path.join(self.cfg["db"], "refpkg", "concat.refpkg"), logfile=pplaceLog, cores=self.cfg["ncorespplacer"], ) if pplacer_success is False: logging.warning("Pplacer could not finish. Exiting now") self.write_outfile() exit(1) # reduce placements to the placements with at least posterior of p logging.debug("Reducing placements") if not self.cfg["touch"]: pplaceOutReduced = pp.reduceJplace( pplaceOut, pplaceOutReduced, self.cfg["minPlacementLikelyhood"]) else: pp.touch([pplaceOutReduced]) logging.debug("Reducing placements done") # run TOG to get a tree togTree = os.path.join(placerDir, "placement.tree") tg = tog("guppy", pplaceOutReduced, togTree, touch=self.cfg["touch"]) if tg.doIneedTorun(self.cfg["force"]): logging.debug("Fetching tree") r = tg.run() if r is False: logging.debug("No placement found") self.write_outfile() logging.debug("Getting best placements") # save path to togtree for plotting later self.cfg["togtreepath"] = togTree self.cfg["togjson"] = pplaceOutReduced # now we can place the bin using the tree if not self.cfg["touch"]: t = treelineage.treeHandler(togTree, annotate=False) t2 = treelineage.treeHandler(self.config.tree, annotate=False) sets = self.getSets() # get HCA and LCA placements self.placements = {} for method in ["LCA", "HPA"]: self.placements[method] = t.getPlacement( method, sets, t2, self.cfg["nPlacements"], self.cfg["minSupport"], maximum=self.cfg["nEvals"], debug=self.cfg["debug"], ) else: self.placements = {"LCA": "touch", "HCA": "touch"} logging.info("MAG succesfully placed in tree")