def testMedianFourStrings(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc\n>are\n\ atggctattgaactgtatct' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['median_length'], 9.5)
def testMedianOneString(self): seq = '>hey\nagtcagtcagtc' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['median_length'], 12)
def testMedianThreeStrings(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['median_length'], 7)
def testMinLengthListTwoStrings(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['min_length'], 5)
def testMedianEmptyInput(self): result = summarize_reads(StringIO(), 'fasta') self.assertEqual(result['median_length'], 0)
def testBaseCountsTwoReads(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['base_counts'], {'a': 4, 'c': 5, 't': 4, 'g': 4})
def testMinLengthListEmptyInput(self): result = summarize_reads(StringIO(), 'fasta') self.assertEqual(result['min_length'], 0)
def testBaseCountsEmptyImput(self): result = summarize_reads(StringIO(), 'fasta') self.assertEqual(result['base_counts'], {})
def testBaseCountsOneRead(self): seq = '>hey\nagtcagtcagtc' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['base_counts'], {'a': 3, 'c': 3, 't': 3, 'g': 3})
def testTotalLengthOneString(self): seq = '>hey\nagtcagtcagtc' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['total_length'], 12)
def testTotalLengthTwoStrings(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['total_length'], 17)
def testTotalLengthEmptyInput(self): result = summarize_reads(StringIO(), 'fasta') self.assertEqual(result['total_length'], 0)
def testReadNumberTwoSequencesCount(self): seq = '>hey\nagtcagtcagtc\n>you\nacctg' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['read_number'], 2)
def testReadNumberOneSequenceCount(self): seq = '>hey\nagtcagtcagtc' result = summarize_reads(StringIO(seq), 'fasta') self.assertEqual(result['read_number'], 1)
def testReadNumberEmptyInput(self): result = summarize_reads(StringIO(), 'fasta') self.assertEqual(result['read_number'], 0)
#!/usr/bin/env python from dark.summarize import summarize_reads import sys if len(sys.argv) > 2: print >>sys.stderr, "Usage: %s file.fasta / file.fastq" % sys.argv[0] sys.exit(1) else: filename = sys.argv[1] if filename.endswith('a'): filetype = 'fasta' else: filetype = 'fastq' result = summarize_reads(filename, filetype) print "Number of reads:", result["read_number"] print "Total length: %s bases" % result["total_length"] print "The average read length: %s bases" % result["average_length"] print "Longest read:", result["max_length"] print "Shortest read:", result["min_length"] print "Median length:", result["median_length"] for base, count in result["base_counts"].items(): print "%s: Total: %s; Average per read: %s" % ( base, count, count / result["read_number"])