Example #1
0
 def testUnknownCategoryWithDefault(self):
     """
     If a base has no category, the summary must have the passed default
     category as the category for those bases.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('a', 1), ('xxx', 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories, 'xxx'))
Example #2
0
 def testUnknownCategory(self):
     """
     If a base has no category, the summary must have C{None} as the
     category for those bases.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('a', 1), (None, 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories))
Example #3
0
 def testOneCategoryPerBase(self):
     """
     If each base is in its own category, the summary must be correct.
     """
     read = DNARead('id', 'ACGT')
     categories = {
         'A': 0,
         'C': 1,
         'G': 2,
         'T': 3,
     }
     self.assertEqual([(0, 1), (1, 1), (2, 1), (3, 1)],
                      sequenceCategoryLengths(read, categories))
Example #4
0
 def testRepeatedCategory(self):
     """
     If categories are repeated in a sequence, the summary must have the
     correct length for the categories.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'C': 'c',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('a', 1), ('c', 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories))
Example #5
0
 def testSuppressAtEnd(self):
     """
     If a region at the end of the sequence is shorter than the passed
     minimum length, the result should suppress the catgeory information.
     """
     read = DNARead('id', 'CCGGTTTA')
     categories = {
         'A': 'a',
         'C': 'c',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('c', 2), ('g', 2), ('t', 3), ('...', 1)],
                      sequenceCategoryLengths(read, categories,
                                              minLength=2))
Example #6
0
 def testAllSuppressed(self):
     """
     If all regions of the sequence are shorter than the passed
     minimum length, the result should suppress the catgeory information
     and the suppressed region length must be the sum of the region lengths.
     """
     read = DNARead('id', 'ACCGGGTTT')
     categories = {
         'A': 'a',
         'C': 'c',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('...', 9)],
                      sequenceCategoryLengths(read, categories,
                                              minLength=5))
Example #7
0
 def testSuppressWithNonDefaultSuppresscategory(self):
     """
     If a region of the sequence is shorter than the passed minimum length,
     the result should suppress the catgeory information and the suppress
     category returned must be the one that is passed.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'C': 'c',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('s', 1), ('c', 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(
                          read, categories, minLength=2,
                          suppressedCategory='s'))
Example #8
0
 def testSuppressTwoAtStart(self):
     """
     If 2 regions at the start of the sequence are shorter than the passed
     minimum length, the result should suppress the catgeory information
     and the length of the suppressed region must be the sum of the lengths
     of the regions.
     """
     read = DNARead('id', 'AGCCGGTTT')
     categories = {
         'A': 'a',
         'C': 'c',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('...', 2), ('c', 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories,
                                              minLength=2))
Example #9
0
    categoryWidth = max(
        [len(category) for category in categories.values()] + [len(default)])

    minLength = args.minLength
    concise = args.concise

    for index, read in enumerate(reads, start=1):
        counts = defaultdict(int)
        readLen = len(read)
        width = int(log10(readLen)) + 1
        if not concise:
            summary = []
            append = summary.append
            offset = 1
        for (category, count) in sequenceCategoryLengths(
                read, categories, defaultCategory=default,
                minLength=minLength):
            counts[category] += count
            if not concise:
                append('    %*d %-*s (offset %*d)' %
                       (width, count, categoryWidth, category, width, offset))
                offset += count
        print('%d: %s (length %d)' % (index, read.id, readLen))
        for category in sorted(counts):
            count = counts[category]
            print('  %-*s: %*d (%6.2f%%)' %
                  (categoryWidth, category, width, count,
                   count / readLen * 100.0))
        if not concise:
            print('\n'.join(summary))
Example #10
0
 def testEmpty(self):
     """
     An empty sequence should result in an empty category summary.
     """
     read = DNARead('id', '')
     self.assertEqual([], sequenceCategoryLengths(read, {}))