def summarizeReads(file_handle, file_type): """ open a fasta or fastq file, prints number of of reads, average length of read, total number of bases, longest, shortest and median read, total number and average of individual base (A, T, G, C, N). """ base_counts = defaultdict(int) read_number = 0 total_length = 0 length_list = [] records = SeqIO.parse(file_handle, file_type) for record in records: total_length += len(record) read_number += 1 length_list.append(len(record)) for base in record: base_counts[base] += 1 result = { "read_number": read_number, "total_length": total_length, "average_length": total_length / read_number if read_number > 0 else 0, "max_length": max(length_list) if length_list else 0, "min_length": min(length_list) if length_list else 0, "median_length": median(length_list) if length_list else 0, "base_counts": base_counts } return result
def medianScore(self): """ Find the median score for the HSPs in the alignments that match this title. @raise ValueError: If there are no HSPs. @return: The C{float} median score of HSPs in alignments matching the title. """ return median([hsp.score.score for hsp in self.hsps()])
def testMedianOfTwo(self): """ The median function must work on a list of length two. """ self.assertEqual(4.5, median([3.1, 5.9]))
def testMedianOfOne(self): """ The median function must work on a list of length one. """ self.assertEqual(3, median([3]))
def testMedianOfFive(self): """ The median function must work on a list of length five. """ self.assertEqual(5.9, median([3.1, 1.3, 7.6, 9.9, 5.9]))
def testMedianOfFour(self): """ The median function must work on a list of length four. """ self.assertEqual(4.5, median([3.1, 1.3, 7.6, 5.9]))
def testMedianOfThree(self): """ The median function must work on a list of length threee. """ self.assertEqual(5.9, median([3.1, 7.6, 5.9]))