예제 #1
0
    def calculateSeqStats(self, scaffolds, scaffoldsStats):
        """Calculate scaffold length statistics (min length, max length, total length, N50, # contigs)."""
        scaffoldLens = []
        contigLens = []
        numAmbiguousBases = 0
        for scaffoldId, scaffold in scaffolds.iteritems():
            scaffoldLen = len(scaffold)
            scaffoldLens.append(scaffoldLen)

            splitScaffold = scaffold.split(DefaultValues.CONTIG_BREAK)
            lenContigsInScaffold = []
            for contig in splitScaffold:
                contigLen = len(contig.replace('N', ''))
                if contigLen > 0:
                    lenContigsInScaffold.append(contigLen)

            contigLens += lenContigsInScaffold

            scaffoldsStats[scaffoldId]['Length'] = scaffoldLen
            scaffoldsStats[scaffoldId]['Total contig length'] = sum(lenContigsInScaffold)
            scaffoldsStats[scaffoldId]['# contigs'] = len(lenContigsInScaffold)

            numAmbiguousBases += scaffold.count('N') + scaffold.count('n')

        scaffold_N50 = calculateN50(scaffoldLens)
        contig_N50 = calculateN50(contigLens)

        return max(scaffoldLens), max(contigLens), sum(scaffoldLens), scaffold_N50, contig_N50, len(contigLens), numAmbiguousBases
예제 #2
0
    def calculateSeqStats(self, scaffolds, seqStats=None):
        """Calculate scaffold length statistics (min length, max length, total length, N50, # contigs)."""
        scaffoldLens = []
        contigLens = []
        numAmbiguousBases = 0
        for scaffoldId, scaffold in scaffolds.items():
            scaffoldLen = len(scaffold)
            scaffoldLens.append(scaffoldLen)

            splitScaffold = scaffold.split(DefaultValues.CONTIG_BREAK)
            lenContigsInScaffold = []
            for contig in splitScaffold:
                contigLen = len(contig.replace('N', ''))
                if contigLen > 0:
                    lenContigsInScaffold.append(contigLen)

            contigLens += lenContigsInScaffold

            if seqStats:
                seqStats[scaffoldId]['Length'] = scaffoldLen
                seqStats[scaffoldId]['Total contig length'] = sum(
                    lenContigsInScaffold)
                seqStats[scaffoldId]['# contigs'] = len(lenContigsInScaffold)

            numAmbiguousBases += scaffold.count('N') + scaffold.count('n')

        scaffold_N50 = calculateN50(scaffoldLens)
        contig_N50 = calculateN50(contigLens)

        return max(scaffoldLens), max(contigLens), sum(
            scaffoldLens), scaffold_N50, contig_N50, mean(scaffoldLens), mean(
                contigLens), len(contigLens), numAmbiguousBases
 def testScaffoldLengthStats(self):
     """Verify computation of N50."""
     n50 = calculateN50([1, 1, 2, 2, 2, 2, 10])
     self.assertEqual(n50, 10)