def main(args): # make our error profile report sys.stderr.write("Reading reference fasta\n") ref = FastaData(open(args.reference).read()) sys.stderr.write("Reading alignments\n") epf = ErrorProfileFactory() if args.random: bf = None if args.input_index: bf = BAMFile(args.input, reference=ref, index_file=args.input_index) bf.read_index(index_file=args.input_index) else: bf = BAMFile(args.input, reference=ref) bf.read_index() if not bf.has_index(): sys.stderr.write("Random access requires an index be set\n") z = 0 strand = 'target' if args.query: strand = 'query' con = 0 while True: rname = random.choice(bf.index.get_names()) #print rname coord = bf.index.get_longest_target_alignment_coords_by_name(rname) #print coord if not coord: continue e = bf.fetch_by_coord(coord) if e.is_aligned(): epf.add_alignment(e) z += 1 if z % 100 == 1: con = epf.get_min_context_count(strand) sys.stderr.write( str(z) + " alignments, " + str(con) + " min context coverage\r") if args.max_alignments <= z: break if args.stopping_point <= con: break else: bf = BAMFile(args.input, reference=ref) z = 0 strand = 'target' if args.query: strand = 'query' con = 0 for e in bf: if e.is_aligned(): epf.add_alignment(e) z += 1 if z % 100 == 1: con = epf.get_min_context_count(strand) sys.stderr.write( str(z) + " alignments, " + str(con) + " min context coverage\r") if args.max_alignments <= z: break if args.stopping_point <= con: break sys.stderr.write("\n") sys.stderr.write('working with:' + "\n") sys.stderr.write( str(z) + " alignments, " + str(con) + " min context coverage" + "\n") epf.write_context_error_report(args.tempdir + '/err.txt', strand) for ofile in args.output: cmd = args.rscript_path + ' ' + os.path.dirname( os.path.realpath(__file__) ) + '/plot_base_error_context.r ' + args.tempdir + '/err.txt ' + ofile + ' ' if args.scale: cmd += ' '.join([str(x) for x in args.scale]) sys.stderr.write(cmd + "\n") call(cmd.split()) sys.stderr.write("finished\n") if args.output_raw: of = open(args.output_raw, 'w') with open(args.tempdir + "/err.txt") as inf: for line in inf: of.write(line) # Temporary working directory step 3 of 3 - Cleanup if not args.specific_tempdir: rmtree(args.tempdir)
def main(args): sys.stderr.write("Reading our reference Fasta\n") ref = FastaData(open(args.reference, 'rb').read()) sys.stderr.write("Finished reading our reference Fasta\n") bf = None if args.input_index: bf = BAMFile(args.input, reference=ref, index_file=args.input_index) bf.read_index(index_file=args.input_index) else: bf = BAMFile(args.input, reference=ref) bf.read_index() epf = ErrorProfileFactory() if args.random: if not bf.has_index(): sys.stderr.write( "Random access requires our format of index bgi to be set\n") sys.exit() z = 0 while True: rname = random.choice(bf.index.get_names()) coord = bf.index.get_longest_target_alignment_coords_by_name(rname) if not coord: continue e = bf.fetch_by_coord(coord) if e.is_aligned(): epf.add_alignment(e) z += 1 #print z if z % 100 == 1: con = epf.get_alignment_errors().alignment_length if args.max_length <= con: break sys.stderr.write( str(con) + "/" + str(args.max_length) + " bases from " + str(z) + " alignments\r") sys.stderr.write("\n") else: z = 0 for e in bf: if e.is_aligned(): epf.add_alignment(e) z += 1 #print z if z % 100 == 1: con = epf.get_alignment_errors().alignment_length if args.max_length <= con: break sys.stderr.write( str(con) + "/" + str(args.max_length) + " bases from " + str(z) + " alignments\r") sys.stderr.write("\n") of = open(args.tempdir + '/report.txt', 'w') of.write(epf.get_alignment_errors().get_report()) of.close() for ofile in args.output: cmd = args.rscript_path + ' ' + os.path.dirname( os.path.realpath(__file__) ) + '/plot_alignment_errors.r ' + args.tempdir + '/report.txt ' + ofile + ' ' if args.scale: cmd += ' '.join([str(x) for x in args.scale]) sys.stderr.write(cmd + "\n") call(cmd.split()) if args.output_raw: of = open(args.output_raw, 'w') with open(args.tempdir + "/report.txt") as inf: for line in inf: of.write(line) of.close() if args.output_stats: of = open(args.output_stats, 'w') of.write(epf.get_alignment_errors().get_stats()) of.close() sys.stderr.write("finished\n") # Temporary working directory step 3 of 3 - Cleanup if not args.specific_tempdir: rmtree(args.tempdir)