def calc_coverage(bamfile, regions=None, mtchr=None): depths = [] for region in regions: output_dir = OrderedDict() if type(region) == Interval: # Add one to start as starts are 0 based; ends are 1 based. chrom, start, end = str(region.chrom), region.start + 1, region.stop output_dir["name"] = region.name else: chrom, start, end = re.split("[:-]", region) start, end = int(start), int(end) output_dir["chrom"] = chrom output_dir["start"] = start output_dir["end"] = end # If end extends to far, adjust for chrom chrom_len = bamfile.lengths[bamfile.gettid(chrom)] if end > chrom_len: with indent(4): puts_err( colored.yellow( "\nSpecified chromosome end extends beyond chromosome length. Set to max of: " + str(chrom_len) + "\n" ) ) end = chrom_len region = bamfile.pileup(chrom, start, end + 1, truncate=True, max_depth=1e8) cum_depth = 0 pos_covered = 0 for n, i in enumerate(region): pos_covered += 1 cum_depth += i.nsegments length = end - start + 1 coverage = cum_depth / float(length) breadth = pos_covered / float(length) output_dir["ATTR"] = "bases_mapped" print eav(bam_name, output_dir, cum_depth, args["--tsv"], args["--header"]) output_dir["ATTR"] = "depth_of_coverage" print eav(bam_name, output_dir, coverage, args["--tsv"]) output_dir["ATTR"] = "breadth_of_coverage" print eav(bam_name, output_dir, breadth, args["--tsv"]) output_dir["ATTR"] = "length" print eav(bam_name, output_dir, length, args["--tsv"]) output_dir["ATTR"] = "pos_mapped" print eav(bam_name, output_dir, pos_covered, args["--tsv"]) depths.append( {"chrom": chrom, "bases_mapped": cum_depth, "pos_covered": pos_covered, "depth_of_coverage": coverage} ) return depths
else: mtchr = mtchr[0] with indent(4): puts_err(colored.blue("\nGuessing Mitochondrial Chromosome: " + mtchr + "\n")) depths = [] cov = calc_coverage(bamfile, chroms, mtchr) # Genomewide depth output_dir = {} genome_length = sum([x for x in bamfile.lengths], args["--tsv"]) output_dir["length"] = genome_length output_dir["chrom"] = "genome" bases_mapped = sum([x["bases_mapped"] for x in cov]) output_dir["ATTR"] = "bases_mapped" print eav(bam_name, output_dir, bases_mapped, args["--tsv"]) output_dir["ATTR"] = "depth_of_coverage" coverage = bases_mapped / float(genome_length) print eav(bam_name, output_dir, coverage, args["--tsv"]) output_dir["ATTR"] = "breadth_of_coverage" breadth = sum([x["pos_covered"] for x in cov]) / float(genome_length) print eav(bam_name, output_dir, breadth, args["--tsv"]) output_dir["ATTR"] = "positions_mapped" pos_mapped = sum([x["pos_covered"] for x in cov]) print eav(bam_name, output_dir, pos_mapped, args["--tsv"]) if mtchr: # Nuclear