def get_depth_analysis(aln_filename, chrom): # get depth from bam_stats args = [ '-i', aln_filename, '--silent', '--depth_spacing', str(DEPTH_SPACING) ] bam_summaries, length_summaries, depth_summaries = bam_stats.main(args) return depth_summaries[aln_filename][chrom][bam_stats.D_ALL_DEPTH_MAP]
def get_genome_read_depths(bam_file, args): spacing = args.sampling_spacing verbose = args.verbose if not os.path.isfile(bam_file): raise Exception("Genome bam {} does not exist!".format(bam_file)) log(bam_file) bs_args = [ "--input_glob", bam_file, "--depth_spacing", str(spacing), '--silent' ] if args.filter_secondary: bs_args.append('--filter_secondary') if args.filter_primary: bs_args.append('--filter_primary') bam_summaries, _, depth_summaries = bam_stats.main(bs_args) if bam_stats.GENOME_KEY in depth_summaries[bam_file]: del depth_summaries[bam_file][bam_stats.GENOME_KEY] positions = dict() chromosomes = list(depth_summaries[bam_file].keys()) chromosomes.sort(key=chrom_sort) for chromosome in chromosomes: if verbose: log("{}: \t{}: \tread_count: {}".format( bam_file, chromosome, bam_summaries[bam_file][chromosome][ bam_stats.B_FILTERED_READ_COUNT])) log("{}: \t{}: \tmax_depth: {}".format( bam_file, chromosome, depth_summaries[bam_file][chromosome][bam_stats.D_MAX])) log("{}: \t{}: \tmin_depth: {}".format( bam_file, chromosome, depth_summaries[bam_file][chromosome][bam_stats.D_MIN])) log("{}: \t{}: \tavg_depth: {}".format( bam_file, chromosome, depth_summaries[bam_file][chromosome][bam_stats.D_AVG])) log("{}: \t{}: \tstd_depth: {}".format( bam_file, chromosome, depth_summaries[bam_file][chromosome][bam_stats.D_STD])) positions[chromosome] = depth_summaries[bam_file][chromosome][ bam_stats.D_ALL_DEPTH_MAP] pass return positions
def main(): args = parse_args() assert False not in [ len(args.input_bam_glob) > 0, os.path.isfile(args.coordinate_tsv), os.path.isdir(args.output_location) ] coords = list() with open(args.coordinate_tsv) as tsv_in: for line in tsv_in: # skip if line.startswith("#"): continue if len(line.strip()) == 0: continue # get line parts line = line.strip().split("\t") # save relevant data coord = {CHR: line[0]} if len(line) >= 3: coord[START] = int(line[1]) coord[END] = int(line[2]) if args.description_column is not None: coord[DESC] = "_".join(line[args.description_column].split()) coords.append(coord) for file in glob.glob(args.input_bam_glob): for coord in coords: out_filename = get_output_filename(file, coord) out_location = os.path.join(args.output_location, out_filename) region = coord[CHR] if len(coord) < 3 else "{}:{}-{}".format( coord[CHR], coord[START], coord[END]) # log what's happening print("{}:\n\tloc: {}\n\tdesc: {}\n\tout: {}".format( file, region, coord[DESC] if DESC in coord else '', out_location), file=sys.stderr) # get the region and index samtools_args = ['samtools', 'view', '-hb', file, region] with open(out_location, 'w') as output: subprocess.check_call(samtools_args, stdout=output) subprocess.check_call(['samtools', 'index', out_location]) # maybe make stats if args.produce_bam_stats: stats_filename = "{}.stats.txt".format(out_filename) stats_location = os.path.join(args.output_location, stats_filename) bam_stats_args = [ '-i', out_location, '-g', '-l', '-d', '-v', '-o', stats_location ] if len(coord) >= 3: bam_stats_args.extend( ['-r', '{}-{}'.format(coord[START], coord[END])]) # get summaries generic_summaries, _, depth_summaries = bam_stats.main( bam_stats_args) # are we flagging output based on depth? if args.min_depth_threshold is not None: # find depth generic_summary = generic_summaries[out_location][ bam_stats.GENOME_KEY] depth_summary = depth_summaries[out_location][ bam_stats.GENOME_KEY] depth_summary_value = int( max(depth_summary[bam_stats.D_MED], depth_summary[bam_stats.D_AVG])) print("\tsection_depth:{}".format(depth_summary_value), file=sys.stderr) # handle min depth if args.min_depth_threshold <= depth_summary_value: print("\tFLAGGED!", file=sys.stderr) # also try to get stats for only primary reads bam_stats_args_prim = [ '-i', out_location, '-d', '-V', '--filter_secondary', '--filter_supplementary', '-r', '{}-{}'.format(coord[START], coord[END]) ] _, _, prim_depth_summaries = bam_stats.main( bam_stats_args_prim) prim_depth_summary = prim_depth_summaries[ out_location][bam_stats.GENOME_KEY] prim_depth_summary_value = int( max(prim_depth_summary[bam_stats.D_MED], prim_depth_summary[bam_stats.D_AVG])) # document depths in flagged filename flag_filename = "FLAGGED.DEPTH_{:04d}_p{:04d}.{}.stats.txt".format( depth_summary_value, prim_depth_summary_value, out_filename) with open( os.path.join(args.output_location, flag_filename), 'w') as flag_out: print("####################################", file=flag_out) print("bam_file:{}".format(out_filename), file=flag_out) print("bam_stats_file:{}".format(stats_filename), file=flag_out) print("depth_threshold:{}".format( args.min_depth_threshold), file=flag_out) print("median_depth:{}".format( depth_summary[bam_stats.D_MED]), file=flag_out) print("mean_depth:{}".format( depth_summary[bam_stats.D_AVG]), file=flag_out) print("total_reads:{}".format( generic_summary[bam_stats.B_READ_COUNT]), file=flag_out) print("filtered_reads:{}".format( generic_summary[bam_stats.B_SECONDARY_COUNT] + generic_summary[ bam_stats.B_SUPPLEMENTARY_COUNT]), file=flag_out) print("primary_median_depth:{}".format( prim_depth_summary[bam_stats.D_MED]), file=flag_out) print("primary_mean_depth:{}".format( prim_depth_summary[bam_stats.D_AVG]), file=flag_out) print("Fin.", file=sys.stderr)