def testMedianFourStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc\n>are\n\
     atggctattgaactgtatct'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 9.5)
 def testMedianOneString(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 12)
 def testMedianThreeStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg\n>how\natgggtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['median_length'], 7)
 def testMinLengthListTwoStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['min_length'], 5)
 def testMedianEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['median_length'], 0)
 def testBaseCountsTwoReads(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['base_counts'],
                      {'a': 4, 'c': 5, 't': 4, 'g': 4})
 def testMinLengthListEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['min_length'], 0)
 def testBaseCountsEmptyImput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['base_counts'], {})
 def testBaseCountsOneRead(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['base_counts'],
                      {'a': 3, 'c': 3, 't': 3, 'g': 3})
 def testTotalLengthOneString(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['total_length'], 12)
 def testTotalLengthTwoStrings(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['total_length'], 17)
 def testTotalLengthEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['total_length'], 0)
 def testReadNumberTwoSequencesCount(self):
     seq = '>hey\nagtcagtcagtc\n>you\nacctg'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['read_number'], 2)
 def testReadNumberOneSequenceCount(self):
     seq = '>hey\nagtcagtcagtc'
     result = summarize_reads(StringIO(seq), 'fasta')
     self.assertEqual(result['read_number'], 1)
 def testReadNumberEmptyInput(self):
     result = summarize_reads(StringIO(), 'fasta')
     self.assertEqual(result['read_number'], 0)
#!/usr/bin/env python

from dark.summarize import summarize_reads
import sys


if len(sys.argv) > 2:
    print >>sys.stderr, "Usage: %s file.fasta / file.fastq" % sys.argv[0]
    sys.exit(1)

else:
    filename = sys.argv[1]
    if filename.endswith('a'):
        filetype = 'fasta'
    else:
        filetype = 'fastq'

    result = summarize_reads(filename, filetype)

    print "Number of reads:", result["read_number"]
    print "Total length: %s bases" % result["total_length"]
    print "The average read length: %s bases" % result["average_length"]
    print "Longest read:", result["max_length"]
    print "Shortest read:", result["min_length"]
    print "Median length:", result["median_length"]

    for base, count in result["base_counts"].items():
        print "%s: Total: %s; Average per read: %s" % (
            base, count, count / result["read_number"])