def calculateSeqStats(self, scaffolds, scaffoldsStats): """Calculate scaffold length statistics (min length, max length, total length, N50, # contigs).""" scaffoldLens = [] contigLens = [] numAmbiguousBases = 0 for scaffoldId, scaffold in scaffolds.iteritems(): scaffoldLen = len(scaffold) scaffoldLens.append(scaffoldLen) splitScaffold = scaffold.split(DefaultValues.CONTIG_BREAK) lenContigsInScaffold = [] for contig in splitScaffold: contigLen = len(contig.replace('N', '')) if contigLen > 0: lenContigsInScaffold.append(contigLen) contigLens += lenContigsInScaffold scaffoldsStats[scaffoldId]['Length'] = scaffoldLen scaffoldsStats[scaffoldId]['Total contig length'] = sum(lenContigsInScaffold) scaffoldsStats[scaffoldId]['# contigs'] = len(lenContigsInScaffold) numAmbiguousBases += scaffold.count('N') + scaffold.count('n') scaffold_N50 = calculateN50(scaffoldLens) contig_N50 = calculateN50(contigLens) return max(scaffoldLens), max(contigLens), sum(scaffoldLens), scaffold_N50, contig_N50, len(contigLens), numAmbiguousBases
def calculateSeqStats(self, scaffolds, seqStats=None): """Calculate scaffold length statistics (min length, max length, total length, N50, # contigs).""" scaffoldLens = [] contigLens = [] numAmbiguousBases = 0 for scaffoldId, scaffold in scaffolds.items(): scaffoldLen = len(scaffold) scaffoldLens.append(scaffoldLen) splitScaffold = scaffold.split(DefaultValues.CONTIG_BREAK) lenContigsInScaffold = [] for contig in splitScaffold: contigLen = len(contig.replace('N', '')) if contigLen > 0: lenContigsInScaffold.append(contigLen) contigLens += lenContigsInScaffold if seqStats: seqStats[scaffoldId]['Length'] = scaffoldLen seqStats[scaffoldId]['Total contig length'] = sum( lenContigsInScaffold) seqStats[scaffoldId]['# contigs'] = len(lenContigsInScaffold) numAmbiguousBases += scaffold.count('N') + scaffold.count('n') scaffold_N50 = calculateN50(scaffoldLens) contig_N50 = calculateN50(contigLens) return max(scaffoldLens), max(contigLens), sum( scaffoldLens), scaffold_N50, contig_N50, mean(scaffoldLens), mean( contigLens), len(contigLens), numAmbiguousBases
def testScaffoldLengthStats(self): """Verify computation of N50.""" n50 = calculateN50([1, 1, 2, 2, 2, 2, 10]) self.assertEqual(n50, 10)