def removeOutliers(self, binFile, outlierFile, outputFile): """Remove sequences specified as outliers in the provided file.""" binSeqs = readFasta(binFile) binIdToModify = binIdFromFilename(binFile) # get files to remove checkFileExists(outlierFile) seqsToRemove = [] bHeader = True for line in open(outlierFile): if bHeader: bHeader = False continue lineSplit = line.split('\t') binId = lineSplit[0] if binId == binIdToModify: seqId = lineSplit[1] seqsToRemove.append(seqId) # remove sequences from bin if len(seqsToRemove) > 0: self.__removeSeqs(binSeqs, seqsToRemove) # save modified bin writeFasta(binSeqs, outputFile)
def coveragePcaPlot(self, options): """PCA plot of coverage profiles""" self.logger.info( '[CheckM - cov_pca] Creating PCA plot of coverage profiles.') checkDirExists(options.bin_dir) checkFileExists(options.coverage_file) makeSurePathExists(options.output_dir) binFiles = self.binFiles(options.bin_dir, options.extension) coverage = Coverage(threads=1) coverageStats = coverage.parseCoverage(options.coverage_file) seqIds = [] coverageProfiles = [] for binId, seqDict in coverageStats.items(): for seqId, bamDict in seqDict.items(): seqIds.append(seqId) coverages = [] for _, coverage in bamDict.items(): coverages.append(coverage) coverageProfiles.append(coverages) coverageProfiles = np.array(coverageProfiles) if coverageProfiles.shape[1] < 2: self.logger.error( 'Coverage profile is 1 dimensional. PCA requires at least 2 dimensions.' ) sys.exit(1) self.logger.info('Computing PCA of coverage profiles.\n') pca = PCA() pc, variance = pca.pcaMatrix(coverageProfiles, fraction=1.0, bCenter=True, bScale=False) plots = PcaPlot(options) filesProcessed = 1 for f in binFiles: self.logger.info( 'Plotting PCA of coverage profiles for %s (%d of %d)' % (f, filesProcessed, len(binFiles))) filesProcessed += 1 plots.plot(f, seqIds, pc, variance) binId = binIdFromFilename(f) outputFile = os.path.join( options.output_dir, binId) + '.cov_pca_plots.' + options.image_type plots.savePlot(outputFile, dpi=options.dpi) self.logger.info('Plot written to: ' + outputFile) self.timeKeeper.printTimeStamp()
def parseBinStatsExt(self, resultsFolder): """Read bin statistics from file.""" binStatsExtFile = os.path.join(resultsFolder, 'storage', DefaultValues.BIN_STATS_EXT_OUT) checkFileExists(binStatsExtFile) with open(binStatsExtFile, 'r') as f: s = f.read() binStatsExt = ast.literal_eval(s) return binStatsExt
def __init__(self, filename): checkFileExists(filename) self.genes = {} self.lastCodingBase = {} self.__parseGFF(filename) self.codingBaseMasks = {} for seqId in self.genes: self.codingBaseMasks[seqId] = self.__buildCodingBaseMask(seqId)
def parseSeqStats(self, resultsFolder, seqStatsFile): """Read sequence statistics from file.""" seqStatsFile = os.path.join(resultsFolder, 'storage', seqStatsFile) checkFileExists(seqStatsFile) with open(seqStatsFile, 'r') as f: s = f.read() seqStats = ast.literal_eval(s) return seqStats
def run(self, binFiles, seqFile, outSeqFile, outStatsFile, minSeqLen): checkFileExists(seqFile) # get list of sequences in bins self.logger.info(' Reading binned sequences.') binnedSeqs = {} totalBinnedBases = 0 for binFile in binFiles: seqs = readFasta(binFile) binnedSeqs.update(seqs) for seq in seqs.values(): totalBinnedBases += len(seq) self.logger.info(' Read %d (%.2f Mbp) binned sequences.' % (len(binnedSeqs), float(totalBinnedBases) / 1e6)) # get list of all sequences self.logger.info(' Reading all sequences.') allSeqs = readFasta(seqFile) totalBases = 0 for seq in allSeqs.values(): totalBases += len(seq) self.logger.info(' Read %d (%.2f Mbp) sequences.' % (len(allSeqs), float(totalBases) / 1e6)) # write all unbinned sequences self.logger.info(' Identifying unbinned sequences >= %d bp.' % minSeqLen) seqOut = open(outSeqFile, 'w') statsOut = open(outStatsFile, 'w') statsOut.write('Sequence Id\tLength\tGC\n') unbinnedCount = 0 unbinnedBases = 0 for seqId, seq in allSeqs.iteritems(): if seqId not in binnedSeqs: if len(seq) >= minSeqLen: unbinnedCount += 1 seqOut.write('>' + seqId + '\n') seqOut.write(seq + '\n') unbinnedBases += len(seq) a, c, g, t = baseCount(seq) statsOut.write('%s\t%d\t%.2f\n' % (seqId, len(seq), float(g + c) * 100 / (a + c + g + t))) seqOut.close() statsOut.close() self.logger.info(' Identified %d (%.2f Mbp) unbinned sequences.' % (unbinnedCount, float(unbinnedBases) / 1e6)) self.logger.info('') self.logger.info(' Percentage of unbinned sequences: %.2f%%' % (unbinnedCount * 100.0 / len(allSeqs))) self.logger.info(' Percentage of unbinned bases: %.2f%%' % (unbinnedBases * 100.0 / totalBases))
def parseBinStats(self, resultsFolder, binStatsFile): """Read bin statistics from file.""" binStatsFile = os.path.join(resultsFolder, 'storage', binStatsFile) checkFileExists(binStatsFile) with open(binStatsFile, 'r') as f: s = f.read() binStats = ast.literal_eval(s) return binStats
def parseMarkerGeneStats(self, resultsFolder): """Read bin statistics from file.""" markerGeneStatsFile = os.path.join(resultsFolder, 'storage', DefaultValues.MARKER_GENE_STATS) checkFileExists(markerGeneStatsFile) with open(markerGeneStatsFile, 'r') as f: s = f.read() markerGeneStats = ast.literal_eval(s) return markerGeneStats
def qa(self, options): """QA command""" self.logger.info('[CheckM - qa] Tabulating genome statistics.') checkDirExists(options.analyze_dir) if options.exclude_markers: checkFileExists(options.exclude_markers) # calculate AAI between marks with multiple hits in a single bin aai = AminoAcidIdentity() aai.run(options.aai_strain, options.analyze_dir, options.alignment_file) # get HMM file for each bin markerSetParser = MarkerSetParser(options.threads) hmmModelInfoFile = os.path.join(options.analyze_dir, 'storage', DefaultValues.CHECKM_HMM_MODEL_INFO) binIdToModels = markerSetParser.loadBinModels(hmmModelInfoFile) binIdToBinMarkerSets = markerSetParser.getMarkerSets( options.analyze_dir, getBinIdsFromOutDir(options.analyze_dir), options.marker_file, options.exclude_markers) # get results for each bin RP = ResultsParser(binIdToModels) RP.analyseResults( options.analyze_dir, DefaultValues.BIN_STATS_OUT, DefaultValues.HMMER_TABLE_OUT, bIgnoreThresholds=options.bIgnoreThresholds, evalueThreshold=options.e_value, lengthThreshold=options.length, bSkipPseudoGeneCorrection=options.bSkipPseudoGeneCorrection, bSkipAdjCorrection=options.bSkipAdjCorrection) RP.printSummary(options.out_format, aai, binIdToBinMarkerSets, options.bIndividualMarkers, options.coverage_file, options.bTabTable, options.file, anaFolder=options.analyze_dir) RP.cacheResults(options.analyze_dir, binIdToBinMarkerSets, options.bIndividualMarkers) if options.file != '': self.logger.info('QA information written to: ' + options.file) self.timeKeeper.printTimeStamp()
def run(self, binFiles, seqFile, outSeqFile, outStatsFile, minSeqLen): checkFileExists(seqFile) # get list of sequences in bins self.logger.info('Reading binned sequences.') binnedSeqs = {} totalBinnedBases = 0 for binFile in binFiles: seqs = readFasta(binFile) binnedSeqs.update(seqs) for seq in seqs.values(): totalBinnedBases += len(seq) self.logger.info(' Read %d (%.2f Mbp) binned sequences.' % (len(binnedSeqs), float(totalBinnedBases) / 1e6)) # get list of all sequences self.logger.info('Reading all sequences.') allSeqs = readFasta(seqFile) totalBases = 0 for seq in allSeqs.values(): totalBases += len(seq) self.logger.info(' Read %d (%.2f Mbp) sequences.' % (len(allSeqs), float(totalBases) / 1e6)) # write all unbinned sequences self.logger.info('Identifying unbinned sequences >= %d bp.' % minSeqLen) seqOut = open(outSeqFile, 'w') statsOut = open(outStatsFile, 'w') statsOut.write('Sequence Id\tLength\tGC\n') unbinnedCount = 0 unbinnedBases = 0 for seqId, seq in allSeqs.iteritems(): if seqId not in binnedSeqs: if len(seq) >= minSeqLen: unbinnedCount += 1 seqOut.write('>' + seqId + '\n') seqOut.write(seq + '\n') unbinnedBases += len(seq) a, c, g, t = baseCount(seq) statsOut.write('%s\t%d\t%.2f\n' % (seqId, len(seq), float(g + c) * 100 / (a + c + g + t))) seqOut.close() statsOut.close() self.logger.info(' Identified %d (%.2f Mbp) unbinned sequences.' % (unbinnedCount, float(unbinnedBases) / 1e6)) self.logger.info('Percentage of unbinned sequences: %.2f%%' % (unbinnedCount * 100.0 / len(allSeqs))) self.logger.info('Percentage of unbinned bases: %.2f%%' % (unbinnedBases * 100.0 / totalBases))
def genesInClan(self): """Determine all genes within a each clan.""" checkFileExists(self.pfamClanFile) d = defaultdict(set) for line in open(self.pfamClanFile): if '#=GF AC' in line: pfamAcc = line.split()[2].strip() elif '#=GF CL' in line: clanId = line.split()[2].strip() d[clanId].update([pfamAcc]) return d
def parseMarkerGeneStats(self, resultsFolder): """Read bin statistics from file.""" markerGeneStatsFile = os.path.join(resultsFolder, 'storage', DefaultValues.MARKER_GENE_STATS) checkFileExists(markerGeneStatsFile) markerGeneStats = {} with open(markerGeneStatsFile, 'r') as f: for line in f: lineSplit = line.split('\t') markerGeneStats[lineSplit[0]] = ast.literal_eval(lineSplit[1]) return markerGeneStats
def parseBinStats(self, resultsFolder, binStatsFile): """Read bin statistics from file.""" binStatsFile = os.path.join(resultsFolder, 'storage', binStatsFile) checkFileExists(binStatsFile) binStats = {} with open(binStatsFile, 'r') as f: for line in f: lineSplit = line.split('\t') binStats[lineSplit[0]] = ast.literal_eval(lineSplit[1]) return binStats
def parseBinStatsExt(self, resultsFolder): """Read bin statistics from file.""" binStatsExtFile = os.path.join(resultsFolder, 'storage', DefaultValues.BIN_STATS_EXT_OUT) checkFileExists(binStatsExtFile) binStatsExt = {} with open(binStatsExtFile, 'r') as f: for line in f: lineSplit = line.split('\t') binStatsExt[lineSplit[0]] = ast.literal_eval(lineSplit[1]) return binStatsExt
def pfamIdToClanId(self): """Determine clan of each pfam.""" checkFileExists(self.pfamClanFile) d = {} for line in open(self.pfamClanFile): if '#=GF AC' in line: pfamAcc = line.split()[2].strip() elif '#=GF CL' in line: clanId = line.split()[2].strip() d[pfamAcc] = clanId return d
def genePositions(self, filename): checkFileExists(filename) gp = {} for line in open(filename): if line[0] == '>': lineSplit = line[1:].split() geneId = lineSplit[0] startPos = int(lineSplit[2]) endPos = int(lineSplit[4]) gp[geneId] = [startPos, endPos] return gp
def profile(self, options): """Profile command""" self.logger.info( '[CheckM - profile] Calculating percentage of reads mapped to each bin.' ) checkFileExists(options.coverage_file) profile = Profile() profile.run(options.coverage_file, options.file, options.bTabTable) if options.file != '': self.logger.info('Profile information written to: ' + options.file) self.timeKeeper.printTimeStamp()
def tetraSignatures(self, options): """Tetranucleotide signature command""" self.logger.info( '[CheckM - tetra] Calculating tetranucleotide signature of sequences.' ) checkFileExists(options.seq_file) makeSurePathExists(os.path.dirname(options.output_file)) tetraSig = GenomicSignatures(4, options.threads) tetraSig.calculate(options.seq_file, options.output_file) self.logger.info('Tetranucletoide signatures written to: ' + options.output_file) self.timeKeeper.printTimeStamp()
def ssuFinder(self, options): """SSU finder command""" self.logger.info( '[CheckM - ssu_finder] Identifying SSU (16S/18S) rRNAs in sequences.' ) binFiles = self.binFiles(options.bin_dir, options.extension) checkFileExists(options.seq_file) makeSurePathExists(options.output_dir) ssuFinder = SSU_Finder(options.threads) ssuFinder.run(options.seq_file, binFiles, options.output_dir, options.evalue, options.concatenate) self.timeKeeper.printTimeStamp()
def binUnion(self, options): """Bin union command""" self.logger.info( '[CheckM - bin_union] Redundancy reduce multiple sets of bins into a single set.' ) output_dir = options.output_dir makeSurePathExists(output_dir) bin_dirs = [] checkmQaTsvs = [] for i, arg in enumerate(options.bin_or_checkm_qa_table): if i % 2 == 0: checkDirExists(arg) bin_dirs.append(arg) else: checkFileExists(arg) checkmQaTsvs.append(arg) if len(bin_dirs) < 2: self.logger.error( "Need to specify at least two bin folders, found %i: " % len(bin_dirs)) sys.exit(1) if len(bin_dirs) != len(checkmQaTsvs): self.logger.error( "Need to specify the same number of bin folders as checkm_qa_tsv files, found %i and %i, respectively: " % (len(bin_dirs), len(checkmQaTsvs))) sys.exit(1) binFileSets = [] for bin_dir in bin_dirs: self.logger.info( "Reading fasta files with extension %s from bin folder %s" % (options.extension, bin_dir)) binFileSets.append(self.binFiles(bin_dir, options.extension)) binUnion = BinUnion() contigConflictsOutputFile = os.path.join(output_dir, 'contigConflicts.csv') unionBinOutputFile = os.path.join(output_dir, 'union.txt') binUnion.report(bin_dirs, binFileSets, checkmQaTsvs, unionBinOutputFile, contigConflictsOutputFile, options.min_completeness, options.max_contamination)
def parallelCoordPlot(self, options): """Parallel coordinate plot command""" self.logger.info( '[CheckM - par_plot] Creating parallel coordinate plot of GC and coverage.' ) checkDirExists(options.bin_dir) makeSurePathExists(options.output_dir) checkFileExists(options.coverage_file) binFiles = self.binFiles(options.bin_dir, options.extension) # read coverage stats file coverage = Coverage(threads=1) coverageStats = coverage.parseCoverage(options.coverage_file) # calculate sequence stats for all bins self.logger.info('Calculating sequence statistics for each bin.') binStats = BinStatistics() seqStats = {} for f in binFiles: binId = binIdFromFilename(f) seqStats[binId] = binStats.sequenceStats(options.results_dir, f) # create plot for each bin plot = ParallelCoordPlot(options) filesProcessed = 1 for f in binFiles: binId = binIdFromFilename(f) self.logger.info( 'Plotting parallel coordinates for %s (%d of %d)' % (binId, filesProcessed, len(binFiles))) filesProcessed += 1 plot.plot(binId, seqStats, coverageStats) outputFile = os.path.join( options.output_dir, binId) + '.paralel_coord_plot.' + options.image_type plot.savePlot(outputFile, dpi=options.dpi) self.logger.info('Plot written to: ' + outputFile) self.timeKeeper.printTimeStamp()
def outliers(self, options): """Outlier command""" self.logger.info('[CheckM - outlier] Identifying outliers in bins.') checkDirExists(options.bin_dir) checkFileExists(options.tetra_profile) makeSurePathExists(os.path.dirname(options.output_file)) binFiles = self.binFiles(options.bin_dir, options.extension) binTools = BinTools() binTools.identifyOutliers(options.results_dir, binFiles, options.tetra_profile, options.distributions, options.report_type, options.output_file) self.logger.info('Outlier information written to: ' + options.output_file) self.timeKeeper.printTimeStamp()
def __readClansAndNesting(self): checkFileExists(self.pfamClanFile) idNested = defaultdict(list) for line in open(self.pfamClanFile): if '#=GF ID' in line: ID = line.split()[2].strip() elif '#=GF AC' in line: pfamAcc = line.split()[2].strip() pfamAcc = pfamAcc[0:pfamAcc.rfind('.')] self.idToAcc[ID] = pfamAcc elif '#=GF CL' in line: clanId = line.split()[2].strip() self.clan[pfamAcc] = clanId elif '#=GF NE' in line: nestedId = line.split()[2].strip() idNested[nestedId].append(ID) idNested[ID].append(nestedId) # set nested structure to use pfam accessions instead of IDs for ID, nested in idNested.iteritems(): pfamAcc = self.idToAcc[ID] self.nested[pfamAcc] = set([self.idToAcc[x] for x in nested])
def run(self, parser, outputDir): """Run standard E. coli genome to verify operation of CheckM.""" ecoliFile = os.path.join(DefaultValues.CHECKM_DATA_DIR, 'test_data', '637000110.fna') checkFileExists(ecoliFile) options = Options() options.threads = 1 options.extension = 'fna' options.bQuiet = True options.out_folder = os.path.join(outputDir, 'results') if os.path.exists(options.out_folder): shutil.rmtree(options.out_folder) makeSurePathExists(options.out_folder) print '[Step 1]: Verifying tree command.' options.bKeepAlignment = False options.bNucORFs = False options.bCalledGenes = False options.bReducedTree = True options.bin_folder = os.path.join(DefaultValues.CHECKM_DATA_DIR, 'test_data') parser.tree(options) self.verifyTree(options.out_folder) print '\n [Passed]' print '\n' print '[Step 2]: Verifying tree_qa command.' options.tree_folder = options.out_folder options.out_format = 1 options.file = os.path.join(options.out_folder, 'tree_qa_test.tsv') options.bTabTable = True parser.treeQA(options) self.verifyTreeQA(options.file) print '\n [Passed]' print '\n' print '[Step 3]: Verifying lineage_set command.' options.marker_file = os.path.join(options.out_folder, 'lineage_set_test.tsv') options.bForceDomain = False options.bootstrap = 0 options.num_genomes_markers = 30 options.num_genomes_refine = 5 options.bNoLineageSpecificRefinement = False options.bRequireTaxonomy = False options.unique = 10 options.multi = 10 parser.lineageSet(options) self.verifyLineageSet(options.marker_file, options.bRequireTaxonomy) options.bRequireTaxonomy = True parser.lineageSet(options) self.verifyLineageSet(options.marker_file, options.bRequireTaxonomy) print '\n [Passed]' print '\n' print '[Step 4]: Verifying analyze command.' options.bAlignTopHit = False parser.analyze(options) self.verifyAnalyze(options.out_folder) print '\n [Passed]' print '\n' print '[Step 5]: Verifying qa command.' options.alignment_file = None options.analyze_folder = options.out_folder options.out_format = 1 options.exclude_markers = None options.bSkipPseudoGeneCorrection = False options.bSkipAdjCorrection = False options.file = os.path.join(options.out_folder, 'qa_test.tsv') options.bIndividualMarkers = False options.bIgnoreThresholds = False options.aai_strain = 0.9 options.e_value = 1e-10 options.length = 0.7 options.coverage_file = None options.bTabTable = True parser.qa(options) self.verifyQA(options.file) print '\n [Passed]'
def run(self, coverageFile, outFile, bTabTable): checkFileExists(coverageFile) # get number of reads mapped to each bin self.logger.info('Determining number of reads mapped to each bin.') readsMappedToBin = {} binSize = {} totalMappedReads = {} bHeader = True for line in open(coverageFile): if bHeader: bHeader = False continue lineSplit = line.split('\t') # seqId = lineSplit[0] binId = lineSplit[1] seqLen = int(lineSplit[2]) binSize[binId] = binSize.get(binId, 0) + seqLen if binId not in readsMappedToBin: readsMappedToBin[binId] = {} for i in range(3, len(lineSplit), 3): bamId = lineSplit[i] mappedReads = int(lineSplit[i + 2]) totalMappedReads[bamId] = totalMappedReads.get(bamId, 0) + mappedReads readsMappedToBin[binId][bamId] = readsMappedToBin[binId].get( bamId, 0) + mappedReads # calculate percentage of mapped reads to binned populations perMappedReads = {} normBinCoverage = {} sumNormBinCoverage = {} for binId, bamIds in readsMappedToBin.items(): perMappedReads[binId] = {} normBinCoverage[binId] = {} for bamId in bamIds: perMR = float( readsMappedToBin[binId][bamId]) / totalMappedReads[bamId] perMappedReads[binId][bamId] = perMR if binId == DefaultValues.UNBINNED: continue normCoverage = perMR / binSize[binId] normBinCoverage[binId][bamId] = normCoverage sumNormBinCoverage[bamId] = sumNormBinCoverage.get( bamId, 0) + normCoverage for binId, bamIds in normBinCoverage.items(): for bamId in bamIds: if sumNormBinCoverage[bamId] != 0: normBinCoverage[binId][bamId] /= sumNormBinCoverage[bamId] else: normBinCoverage[binId][bamId] = 0 # write community profile oldStdOut = reassignStdOut(outFile) sortedBinIds = sorted(readsMappedToBin.keys()) sortedBamIds = sorted(readsMappedToBin[sortedBinIds[0]].keys()) header = ['Bin Id', 'Bin size (Mbp)'] for bamId in sortedBamIds: header += [bamId + ': mapped reads'] header += [bamId + ': % mapped reads'] header += [bamId + ': % binned populations'] header += [bamId + ': % community'] if bTabTable: print('\t'.join(header)) else: pTable = prettytable.PrettyTable(header) pTable.float_format = '.2' pTable.align = 'c' pTable.align[header[0]] = 'l' pTable.hrules = prettytable.FRAME pTable.vrules = prettytable.NONE for binId in sortedBinIds: row = [binId] row += [float(binSize[binId]) / 1e6] for bamId in sortedBamIds: row += [readsMappedToBin[binId][bamId]] row += [perMappedReads[binId][bamId] * 100.0] if DefaultValues.UNBINNED in perMappedReads: unbinnedPercentage = perMappedReads[ DefaultValues.UNBINNED][bamId] else: unbinnedPercentage = 0 if binId == DefaultValues.UNBINNED: row += ['NA'] row += [unbinnedPercentage * 100.0] else: row += [normBinCoverage[binId][bamId] * 100.0] row += [ normBinCoverage[binId][bamId] * 100.0 * (1.0 - unbinnedPercentage) ] if bTabTable: print('\t'.join(list(map(str, row)))) else: pTable.add_row(row) if not bTabTable: print(pTable.get_string()) restoreStdOut(outFile, oldStdOut)
def run(self, parser, outputDir): """Run standard E. coli genome to verify operation of CheckM.""" ecoliFile = os.path.join(DefaultValues.CHECKM_DATA_DIR, 'test_data', '637000110.fna') checkFileExists(ecoliFile) options = Options() options.threads = 1 options.pplacer_threads = 1 options.extension = 'fna' options.bQuiet = True options.out_folder = os.path.join(outputDir, 'results') if os.path.exists(options.out_folder): shutil.rmtree(options.out_folder) makeSurePathExists(options.out_folder) print '[Step 1]: Verifying tree command.' options.bKeepAlignment = False options.bNucORFs = False options.bCalledGenes = False options.bReducedTree = True options.bin_folder = os.path.join(DefaultValues.CHECKM_DATA_DIR, 'test_data') parser.tree(options) self.verifyTree(options.out_folder) print '\n [Passed]' print '\n' print '[Step 2]: Verifying tree_qa command.' options.tree_folder = options.out_folder options.out_format = 1 options.file = os.path.join(options.out_folder, 'tree_qa_test.tsv') options.bTabTable = True parser.treeQA(options) self.verifyTreeQA(options.file) print '\n [Passed]' print '\n' print '[Step 3]: Verifying lineage_set command.' options.marker_file = os.path.join(options.out_folder, 'lineage_set_test.tsv') options.bForceDomain = False options.bootstrap = 0 options.num_genomes_markers = 30 options.num_genomes_refine = 5 options.bNoLineageSpecificRefinement = False options.bRequireTaxonomy = False options.unique = 10 options.multi = 10 parser.lineageSet(options) self.verifyLineageSet(options.marker_file, options.bRequireTaxonomy) options.bRequireTaxonomy = True parser.lineageSet(options) self.verifyLineageSet(options.marker_file, options.bRequireTaxonomy) print '\n [Passed]' print '\n' print '[Step 4]: Verifying analyze command.' options.bAlignTopHit = False parser.analyze(options) self.verifyAnalyze(options.out_folder) print '\n [Passed]' print '\n' print '[Step 5]: Verifying qa command.' options.alignment_file = None options.analyze_folder = options.out_folder options.out_format = 1 options.exclude_markers = None options.bSkipPseudoGeneCorrection = False options.bSkipAdjCorrection = False options.file = os.path.join(options.out_folder, 'qa_test.tsv') options.bIndividualMarkers = False options.bIgnoreThresholds = False options.aai_strain = 0.9 options.e_value = 1e-10 options.length = 0.7 options.coverage_file = None options.bTabTable = True parser.qa(options) self.verifyQA(options.file) print '\n [Passed]'