Beispiel #1
0
    def list(self, rankFilter='ALL'):
        """ List all available marker sets from the specified rank."""

        taxonMarkerSets = self.readMarkerSets()

        header = [
            'Rank', 'Taxon', '# genomes', '# marker genes', '# marker sets'
        ]
        pTable = prettytable.PrettyTable(header)
        pTable.align = 'c'
        pTable.align['Rank'] = 'l'
        pTable.align['Taxon'] = 'l'
        pTable.hrules = prettytable.FRAME
        pTable.vrules = prettytable.NONE

        for rank in taxonomicRanks:
            if rankFilter == 'ALL' or rankFilter == rank:
                for taxon in sorted(taxonMarkerSets[rank]):
                    markerSet = taxonMarkerSets[rank][taxon]

                    numMarkers, numMarkerSets = markerSet.size()
                    pTable.add_row([
                        rank, taxon, markerSet.numGenomes, numMarkers,
                        numMarkerSets
                    ])

        print('')
        print(pTable.get_string())
Beispiel #2
0
    def __printSimpleSummaryTable(self, binIdToTaxonomy, resultsParser, bTabTable, outFile):
        # redirect output
        oldStdOut = reassignStdOut(outFile)

        arbitraryBinId = binIdToTaxonomy.keys()[0]
        markerCountLabel = '# unique markers (of %d)' % len(resultsParser.models[arbitraryBinId])
        header = ['Bin Id', markerCountLabel, '# multi-copy', 'Taxonomy']

        if bTabTable:
            pTable = None
            print('\t'.join(header))
        else:
            pTable = prettytable.PrettyTable(header)
            pTable.float_format = '.2'
            pTable.align = 'c'
            pTable.align[header[0]] = 'l'
            pTable.align['Taxonomy'] = 'l'
            pTable.hrules = prettytable.FRAME
            pTable.vrules = prettytable.NONE

        for binId in sorted(binIdToTaxonomy.keys()):
            uniqueHits, multiCopyHits = resultsParser.results[binId].countUniqueHits()

            row = [binId, uniqueHits, multiCopyHits, binIdToTaxonomy[binId]]

            if bTabTable:
                print('\t'.join(map(str, row)))
            else:
                pTable.add_row(row)

        if not bTabTable:
            print(pTable.get_string(sortby=markerCountLabel, reversesort=True))

        # restore stdout
        restoreStdOut(outFile, oldStdOut)
Beispiel #3
0
    def printSummary(self, outputFormat, aai, binIdToBinMarkerSets,
                     bIndividualMarkers, coverageFile, bTabTable, outFile,
                     anaFolder):
        # redirect output
        oldStdOut = reassignStdOut(outFile)

        coverageBinProfiles = None
        if coverageFile:
            coverage = Coverage(1)
            coverageBinProfiles = coverage.binProfiles(coverageFile)

        prettyTableFormats = [1, 2, 3, 9]

        header = self.__getHeader(
            outputFormat,
            binIdToBinMarkerSets[list(binIdToBinMarkerSets.keys())[0]],
            coverageBinProfiles, bTabTable)
        if bTabTable or outputFormat not in prettyTableFormats:
            bTabTable = True
            pTable = None

            if header != None:
                print('\t'.join(header))
        else:
            pTable = prettytable.PrettyTable(header)
            pTable.float_format = '.2'
            pTable.align = 'c'
            pTable.align[header[0]] = 'l'
            pTable.hrules = prettytable.FRAME
            pTable.vrules = prettytable.NONE

        seqsReported = 0
        for binId in sorted(self.results.keys()):
            seqsReported += self.results[binId].printSummary(
                outputFormat, aai, binIdToBinMarkerSets[binId],
                bIndividualMarkers, coverageBinProfiles, pTable, anaFolder)

        if outputFormat in [6, 7] and seqsReported == 0:
            print('[No marker genes satisfied the reporting criteria.]')

        if not bTabTable:
            if outputFormat in [1, 2]:
                print(
                    pTable.get_string(sortby='Completeness', reversesort=True))
            else:
                # only print if there are rows
                if pTable.get_string(print_empty=False):
                    print(pTable.get_string(print_empty=False))

        # restore stdout
        restoreStdOut(outFile, oldStdOut)
Beispiel #4
0
    def __printFullTable(self, binIdToUID, binIdToTaxonomy,
                         binIdToSisterTaxonomy, binIdToLineageStatistics,
                         resultsParser, binStats, bTabTable, outFile):
        # redirect output
        oldStdOut = reassignStdOut(outFile)

        arbitraryBinId = list(binIdToTaxonomy.keys())[0]
        markerCountLabel = '# unique markers (of %d)' % len(
            resultsParser.models[arbitraryBinId])
        header = ['Bin Id', markerCountLabel, "# multi-copy"]
        header += [
            'Insertion branch UID', 'Taxonomy (contained)',
            'Taxonomy (sister lineage)'
        ]
        header += [
            'GC', 'Genome size (Mbp)', 'Gene count', 'Coding density',
            'Translation table'
        ]
        header += [
            '# descendant genomes', 'Lineage: GC mean', 'Lineage: GC std'
        ]
        header += [
            'Lineage: genome size (Mbp) mean', 'Lineage: genome size (Mbp) std'
        ]
        header += ['Lineage: gene count mean', 'Lineage: gene count std']

        if bTabTable:
            pTable = None
            print(('\t'.join(header)))
        else:
            pTable = prettytable.PrettyTable(header)
            pTable.float_format = '.2'
            pTable.float_format['GC'] = '.1'
            pTable.float_format['Lineage: GC mean'] = '.1'
            pTable.float_format['Lineage: GC std'] = '.1'
            pTable.float_format['Lineage: gene count mean'] = '.0'
            pTable.float_format['Lineage: gene count std'] = '.0'
            pTable.align = 'c'
            pTable.align[header[0]] = 'l'
            pTable.align['Insertion branch UID'] = 'l'
            pTable.align['Taxonomy (contained)'] = 'l'
            pTable.align['Taxonomy (sister lineage)'] = 'l'
            pTable.hrules = prettytable.FRAME
            pTable.vrules = prettytable.NONE

        for binId in sorted(binIdToTaxonomy.keys()):
            uniqueHits, multiCopyHits = resultsParser.results[
                binId].countUniqueHits()

            truncSisterLineage = binIdToSisterTaxonomy[binId]
            for taxa in binIdToTaxonomy[binId].split(';'):
                truncSisterLineage = truncSisterLineage.replace(taxa + ';', '')

            if len(truncSisterLineage) == 0:
                truncSisterLineage = 'unresolved'
            elif truncSisterLineage[-1] == ';':
                truncSisterLineage = truncSisterLineage[0:-1]

            row = [binId, uniqueHits, multiCopyHits]
            row += [
                binIdToUID[binId], binIdToTaxonomy[binId], truncSisterLineage
            ]
            row += [binStats[binId]['GC'] * 100]
            row += [float(binStats[binId]['Genome size']) / 1e6]
            row += [binStats[binId]['# predicted genes']]
            row += [binStats[binId]['Coding density']]
            row += [binStats[binId]['Translation table']]
            row += [binIdToLineageStatistics[binId]['# genomes']]
            row += [binIdToLineageStatistics[binId]['gc mean']]
            row += [binIdToLineageStatistics[binId]['gc std']]
            row += [binIdToLineageStatistics[binId]['genome size mean']]
            row += [binIdToLineageStatistics[binId]['genome size std']]
            row += [binIdToLineageStatistics[binId]['gene count mean']]
            row += [binIdToLineageStatistics[binId]['gene count std']]

            if bTabTable:
                print(('\t'.join(map(str, row))))
            else:
                pTable.add_row(row)

        if not bTabTable:
            print((pTable.get_string(sortby=markerCountLabel,
                                     reversesort=True)))

        # restore stdout
        restoreStdOut(outFile, oldStdOut)
Beispiel #5
0
    def run(self, coverageFile, outFile, bTabTable):
        checkFileExists(coverageFile)

        # get number of reads mapped to each bin
        self.logger.info('Determining number of reads mapped to each bin.')

        readsMappedToBin = {}
        binSize = {}
        totalMappedReads = {}
        bHeader = True
        for line in open(coverageFile):
            if bHeader:
                bHeader = False
                continue

            lineSplit = line.split('\t')

            # seqId = lineSplit[0]
            binId = lineSplit[1]

            seqLen = int(lineSplit[2])
            binSize[binId] = binSize.get(binId, 0) + seqLen

            if binId not in readsMappedToBin:
                readsMappedToBin[binId] = {}

            for i in range(3, len(lineSplit), 3):
                bamId = lineSplit[i]
                mappedReads = int(lineSplit[i + 2])

                totalMappedReads[bamId] = totalMappedReads.get(bamId,
                                                               0) + mappedReads
                readsMappedToBin[binId][bamId] = readsMappedToBin[binId].get(
                    bamId, 0) + mappedReads

        # calculate percentage of mapped reads to binned populations
        perMappedReads = {}
        normBinCoverage = {}
        sumNormBinCoverage = {}
        for binId, bamIds in readsMappedToBin.items():
            perMappedReads[binId] = {}
            normBinCoverage[binId] = {}

            for bamId in bamIds:
                perMR = float(
                    readsMappedToBin[binId][bamId]) / totalMappedReads[bamId]
                perMappedReads[binId][bamId] = perMR

                if binId == DefaultValues.UNBINNED:
                    continue

                normCoverage = perMR / binSize[binId]
                normBinCoverage[binId][bamId] = normCoverage
                sumNormBinCoverage[bamId] = sumNormBinCoverage.get(
                    bamId, 0) + normCoverage

        for binId, bamIds in normBinCoverage.items():
            for bamId in bamIds:
                if sumNormBinCoverage[bamId] != 0:
                    normBinCoverage[binId][bamId] /= sumNormBinCoverage[bamId]
                else:
                    normBinCoverage[binId][bamId] = 0

        # write community profile
        oldStdOut = reassignStdOut(outFile)

        sortedBinIds = sorted(readsMappedToBin.keys())
        sortedBamIds = sorted(readsMappedToBin[sortedBinIds[0]].keys())

        header = ['Bin Id', 'Bin size (Mbp)']
        for bamId in sortedBamIds:
            header += [bamId + ': mapped reads']
            header += [bamId + ': % mapped reads']
            header += [bamId + ': % binned populations']
            header += [bamId + ': % community']

        if bTabTable:
            print('\t'.join(header))
        else:
            pTable = prettytable.PrettyTable(header)
            pTable.float_format = '.2'
            pTable.align = 'c'
            pTable.align[header[0]] = 'l'
            pTable.hrules = prettytable.FRAME
            pTable.vrules = prettytable.NONE

        for binId in sortedBinIds:
            row = [binId]
            row += [float(binSize[binId]) / 1e6]

            for bamId in sortedBamIds:
                row += [readsMappedToBin[binId][bamId]]
                row += [perMappedReads[binId][bamId] * 100.0]

                if DefaultValues.UNBINNED in perMappedReads:
                    unbinnedPercentage = perMappedReads[
                        DefaultValues.UNBINNED][bamId]
                else:
                    unbinnedPercentage = 0

                if binId == DefaultValues.UNBINNED:
                    row += ['NA']
                    row += [unbinnedPercentage * 100.0]
                else:
                    row += [normBinCoverage[binId][bamId] * 100.0]
                    row += [
                        normBinCoverage[binId][bamId] * 100.0 *
                        (1.0 - unbinnedPercentage)
                    ]

            if bTabTable:
                print('\t'.join(list(map(str, row))))
            else:
                pTable.add_row(row)

        if not bTabTable:
            print(pTable.get_string())

        restoreStdOut(outFile, oldStdOut)