Ejemplo n.º 1
0
def tally_qual_scores_gz_bowtie(gz_filename, output, strand, reverse_pos):
	quals_at = defaultdict(lambda: defaultdict(lambda: 0))
	f = BowTieReader(gz_filename, False)
	max_phred_seen = 42
	count = 0
	for r in f:
		if strand is not None and r['strand']!=strand:
			continue
		count += 1
		r_qual = r['qual']
		if reverse_pos:
			r_qual = r_qual[::-1]
		for pos,q in enumerate(r_qual):
			assert ord(q) - 33 >= 0 # for combined reads it is possible to go above 41
			quals_at[pos][ord(q) - 33] += 1
			max_phred_seen = max(max_phred_seen, ord(q) - 33)

	# sanity check
	for pos in quals_at:
		sum(quals_at[pos]) == count

	poses = quals_at.keys()
	poses.sort()
	print >> sys.stderr, "{0} reads used".format(count)

	with open(output, 'w') as f:
		f.write("POS," + ",".join([str(x) for x in xrange(max_phred_seen)]) + '\n')
		for pos in poses:
			f.write(str(pos) + ',' + ",".join([str(quals_at[pos][x]) for x in xrange(max_phred_seen)]) + '\n')