def __init__(self, args): self.args = args self.stats = stats.Stats('') self.samfile = pysam.Samfile(args.input) self.amplicons = amplicon.load_amplicons_from_header(self.samfile.header, self.stats, self.samfile) self.amplicons = dict([(x.external_id, x) for x in self.amplicons])
def __init__(self, args): self.args = args self.stats = stats.Stats('') self.samfile = pysam.Samfile(args.input) self.amplicons = amplicon.load_amplicons_from_header( self.samfile.header, self.stats, self.samfile) self.amplicons = dict([(x.external_id, x) for x in self.amplicons])
def coverage(args): counts = {} inp = pysam.Samfile(args.input) uniq = {} reads = {} total = 0 stats = Stats("") amplicons = amplicon.load_amplicons_from_header(inp.header, stats, None) libs = {} for rg in inp.header["RG"]: for amp in amplicons: key = rg["ID"], amp.external_id reads[key] = uniq[key] = 0 libs[rg["ID"]] = rg.get("LB", None) for r in inp: total += 1 tags = dict(r.tags) try: key = tags["RG"], tags["ea"] except KeyError: continue try: reads[key] += 1 if not r.is_duplicate: uniq[key] += 1 except KeyError: logging.debug("unexpected key") total_ot = sum(reads.values()) total_uniq = sum(uniq.values()) total_ot_p = (100.0 * total_ot) / total try: reads_per_counter = float(total_ot) / total_uniq except ZeroDivisionError: reads_per_counter = 0 print("total %(total)s reads, on target %(total_ot)s, uniq %(total_uniq)s" % locals(), file=sys.stderr) print("on target %3.2f%%" % total_ot_p, file=sys.stderr) print("on target reads per counter: %2.2f" % reads_per_counter, file=sys.stderr) if args.control: total_control = sum([reads[x] for x in reads if x[0] == args.control]) control_p = (100 * total_control) / total print("control reads %(total_control)s, %(control_p)f%%" % locals(), file=sys.stderr) out = csv.writer(sys.stdout) out.writerow(["rg", "lib", "amp", "unique", "reads"]) for (rg, amp) in sorted(reads): if rg != args.control: key = (rg, amp) out.writerow(map(str, (rg, libs[rg], amp, uniq[key], reads[key])))
def coverage(args): counts = {} inp = pysam.Samfile(args.input) uniq = {} reads = {} total = 0 stats = Stats('') amplicons = amplicon.load_amplicons_from_header(inp.header, stats, None) libs = {} for rg in inp.header['RG']: for amp in amplicons: key = rg['ID'], amp.external_id reads[key] = uniq[key] = 0 libs[rg['ID']] = rg.get('LB', None) for r in inp: total += 1 tags = dict(r.tags) try: key = tags['RG'], tags['ea'] except KeyError: continue try: reads[key] += 1 if not r.is_duplicate: uniq[key] += 1 except KeyError: logging.debug('unexpected key') total_ot = sum(reads.values()) total_uniq = sum(uniq.values()) total_ot_p = (100.0 * total_ot) / total try: reads_per_counter = float(total_ot) / total_uniq except ZeroDivisionError: reads_per_counter = 0 print( 'total %(total)s reads, on target %(total_ot)s, uniq %(total_uniq)s' % locals(), file=sys.stderr) print('on target %3.2f%%' % total_ot_p, file=sys.stderr) print('on target reads per counter: %2.2f' % reads_per_counter, file=sys.stderr) if args.control: total_control = sum([reads[x] for x in reads if x[0] == args.control]) control_p = (100 * total_control) / total print('control reads %(total_control)s, %(control_p)f%%' % locals(), file=sys.stderr) out = csv.writer(sys.stdout) out.writerow(['rg', 'lib', 'amp', 'unique', 'reads']) for (rg, amp) in sorted(reads): if rg != args.control: key = (rg, amp) out.writerow(map(str, (rg, libs[rg], amp, uniq[key], reads[key])))