def testUnknownCategoryWithDefault(self): """ If a base has no category, the summary must have the passed default category as the category for those bases. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'G': 'g', 'T': 't', } self.assertEqual([('a', 1), ('xxx', 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories, 'xxx'))
def testUnknownCategory(self): """ If a base has no category, the summary must have C{None} as the category for those bases. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'G': 'g', 'T': 't', } self.assertEqual([('a', 1), (None, 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories))
def testOneCategoryPerBase(self): """ If each base is in its own category, the summary must be correct. """ read = DNARead('id', 'ACGT') categories = { 'A': 0, 'C': 1, 'G': 2, 'T': 3, } self.assertEqual([(0, 1), (1, 1), (2, 1), (3, 1)], sequenceCategoryLengths(read, categories))
def testRepeatedCategory(self): """ If categories are repeated in a sequence, the summary must have the correct length for the categories. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'C': 'c', 'G': 'g', 'T': 't', } self.assertEqual([('a', 1), ('c', 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories))
def testSuppressAtEnd(self): """ If a region at the end of the sequence is shorter than the passed minimum length, the result should suppress the catgeory information. """ read = DNARead('id', 'CCGGTTTA') categories = { 'A': 'a', 'C': 'c', 'G': 'g', 'T': 't', } self.assertEqual([('c', 2), ('g', 2), ('t', 3), ('...', 1)], sequenceCategoryLengths(read, categories, minLength=2))
def testAllSuppressed(self): """ If all regions of the sequence are shorter than the passed minimum length, the result should suppress the catgeory information and the suppressed region length must be the sum of the region lengths. """ read = DNARead('id', 'ACCGGGTTT') categories = { 'A': 'a', 'C': 'c', 'G': 'g', 'T': 't', } self.assertEqual([('...', 9)], sequenceCategoryLengths(read, categories, minLength=5))
def testSuppressWithNonDefaultSuppresscategory(self): """ If a region of the sequence is shorter than the passed minimum length, the result should suppress the catgeory information and the suppress category returned must be the one that is passed. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'C': 'c', 'G': 'g', 'T': 't', } self.assertEqual([('s', 1), ('c', 2), ('g', 2), ('t', 3)], sequenceCategoryLengths( read, categories, minLength=2, suppressedCategory='s'))
def testSuppressTwoAtStart(self): """ If 2 regions at the start of the sequence are shorter than the passed minimum length, the result should suppress the catgeory information and the length of the suppressed region must be the sum of the lengths of the regions. """ read = DNARead('id', 'AGCCGGTTT') categories = { 'A': 'a', 'C': 'c', 'G': 'g', 'T': 't', } self.assertEqual([('...', 2), ('c', 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories, minLength=2))
categoryWidth = max( [len(category) for category in categories.values()] + [len(default)]) minLength = args.minLength concise = args.concise for index, read in enumerate(reads, start=1): counts = defaultdict(int) readLen = len(read) width = int(log10(readLen)) + 1 if not concise: summary = [] append = summary.append offset = 1 for (category, count) in sequenceCategoryLengths( read, categories, defaultCategory=default, minLength=minLength): counts[category] += count if not concise: append(' %*d %-*s (offset %*d)' % (width, count, categoryWidth, category, width, offset)) offset += count print('%d: %s (length %d)' % (index, read.id, readLen)) for category in sorted(counts): count = counts[category] print(' %-*s: %*d (%6.2f%%)' % (categoryWidth, category, width, count, count / readLen * 100.0)) if not concise: print('\n'.join(summary))
def testEmpty(self): """ An empty sequence should result in an empty category summary. """ read = DNARead('id', '') self.assertEqual([], sequenceCategoryLengths(read, {}))