def treeQA(self, options): """QA command""" self.logger.info( '[CheckM - tree_qa] Assessing phylogenetic markers found in each bin.' ) checkDirExists(options.tree_dir) # set HMM file for each bin markerSetParser = MarkerSetParser() hmmModelInfoFile = os.path.join(options.tree_dir, 'storage', DefaultValues.PHYLO_HMM_MODEL_INFO) binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) # calculate marker gene statistics RP = ResultsParser(binIdToModels) binStats = RP.analyseResults(options.tree_dir, DefaultValues.BIN_STATS_PHYLO_OUT, DefaultValues.HMMER_TABLE_PHYLO_OUT) # determine taxonomy of each bin treeParser = TreeParser() treeParser.printSummary(options.out_format, options.tree_dir, RP, options.bTabTable, options.file, binStats) if options.file != '': self.logger.info('QA information written to: ' + options.file) self.timeKeeper.printTimeStamp()
def qa(self, options): """QA command""" self.logger.info('[CheckM - qa] Tabulating genome statistics.') checkDirExists(options.analyze_dir) if options.exclude_markers: checkFileExists(options.exclude_markers) # calculate AAI between marks with multiple hits in a single bin aai = AminoAcidIdentity() aai.run(options.aai_strain, options.analyze_dir, options.alignment_file) # get HMM file for each bin markerSetParser = MarkerSetParser(options.threads) hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage', DefaultValues.CHECKM_HMM_MODEL_INFO) binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) binIdToBinMarkerSets = markerSetParser.getMarkerSets( options.analyze_dir, getBinIdsFromOutDir(options.analyze_dir), options.marker_file, options.exclude_markers) # get results for each bin RP = ResultsParser(binIdToModels) RP.analyseResults( options.analyze_dir, DefaultValues.BIN_STATS_OUT, DefaultValues.HMMER_TABLE_OUT, bIgnoreThresholds=options.bIgnoreThresholds, evalueThreshold=options.e_value, lengthThreshold=options.length, bSkipPseudoGeneCorrection=options.bSkipPseudoGeneCorrection, bSkipAdjCorrection=options.bSkipAdjCorrection) RP.printSummary(options.out_format, aai, binIdToBinMarkerSets, options.bIndividualMarkers, options.coverage_file, options.bTabTable, options.file, anaFolder=options.analyze_dir) RP.cacheResults(options.analyze_dir, binIdToBinMarkerSets, options.bIndividualMarkers) if options.file != '': self.logger.info('QA information written to: ' + options.file) self.timeKeeper.printTimeStamp()
def lineageSet(self, options, db=None): """Lineage set command""" self.logger.info( '[CheckM - lineage_set] Inferring lineage-specific marker sets.') checkDirExists(options.tree_dir) # set HMM file for each bin markerSetParser = MarkerSetParser() hmmModelInfoFile = os.path.join(options.tree_dir, 'storage', DefaultValues.PHYLO_HMM_MODEL_INFO) binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) # calculate marker gene statistics resultsParser = ResultsParser(binIdToModels) resultsParser.analyseResults(options.tree_dir, DefaultValues.BIN_STATS_PHYLO_OUT, DefaultValues.HMMER_TABLE_PHYLO_OUT) # These options are incompatible with how the lineage-specific marker set is selected, so # the default values are currently hard-coded options.num_genomes_markers = 2 options.bootstrap = 0 options.bRequireTaxonomy = False treeParser = TreeParser() treeParser.getBinMarkerSets( options.tree_dir, options.marker_file, options.num_genomes_markers, options.bootstrap, options.bNoLineageSpecificRefinement, options.bForceDomain, options.bRequireTaxonomy, resultsParser, options.unique, options.multi) self.logger.info('Marker set written to: ' + options.marker_file) self.timeKeeper.printTimeStamp()