def report_coverage(data_folder, adaID, VERBOSE=0, summary=True): '''Produce a report on rough coverage on reference (ignore inserts)''' ref_filename = get_reference_premap_filename(data_folder, adaID) refseq = SeqIO.read(ref_filename, 'fasta') # Prepare data structures coverage = np.zeros(len(refseq), int) # Parse the BAM file unmapped = 0 mapped = 0 bamfilename = get_premapped_filename(data_folder, adaID, type='bam') with pysam.Samfile(bamfilename, 'rb') as bamfile: for read in bamfile: if read.is_unmapped or (not read.is_proper_pair) or (not len( read.cigar)): unmapped += 1 continue # Proceed along CIGARs ref_pos = read.pos for (bt, bl) in read.cigar: if bt not in (0, 2): continue # Treat deletions as 'covered' coverage[ref_pos:ref_pos + bl] += 1 ref_pos += bl mapped += 1 # Save results from hivwholeseq.sequencing.filenames import get_coverage_figure_filename import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1, figsize=(13, 6)) ax.plot(np.arange(len(refseq)), coverage + 1, lw=2, c='b') ax.set_xlabel('Position') ax.set_ylabel('Coverage') ax.set_yscale('log') ax.set_title('adaID ' + adaID + ', premapped', fontsize=18) ax.set_xlim(-20, len(refseq) + 20) plt.tight_layout() from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) plt.savefig(get_coverage_figure_filename(data_folder, adaID, 'premapped')) plt.close(fig) if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nPremapping results: '+\ str(mapped)+' read pairs mapped, '+str(unmapped)+' unmapped\n') f.write('\nCoverage plotted: '+\ get_coverage_figure_filename(data_folder, adaID, 'premapped')+'\n')
def report_coverage(data_folder, adaID, VERBOSE=0, summary=True): '''Produce a report on rough coverage on reference (ignore inserts)''' ref_filename = get_reference_premap_filename(data_folder, adaID) refseq = SeqIO.read(ref_filename, 'fasta') # Prepare data structures coverage = np.zeros(len(refseq), int) # Parse the BAM file unmapped = 0 mapped = 0 bamfilename = get_premapped_filename(data_folder, adaID, type='bam') with pysam.Samfile(bamfilename, 'rb') as bamfile: for read in bamfile: if read.is_unmapped or (not read.is_proper_pair) or (not len( read.cigar)): unmapped += 1 continue # Proceed along CIGARs ref_pos = read.pos for (bt, bl) in read.cigar: if bt not in (0, 2): continue # Treat deletions as 'covered' coverage[ref_pos:ref_pos + bl] += 1 ref_pos += bl mapped += 1 # Save results from hivwholeseq.sequencing.filenames import get_coverage_figure_filename import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1, figsize=(13, 6)) ax.plot(np.arange(len(refseq)), coverage + 1, lw=2, c='b') ax.set_xlabel('Position') ax.set_ylabel('Coverage') ax.set_yscale('log') ax.set_title('adaID ' + adaID + ', premapped', fontsize=18) ax.set_xlim(-20, len(refseq) + 20) plt.tight_layout() from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) plt.savefig(get_coverage_figure_filename(data_folder, adaID, 'premapped')) plt.close(fig) if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nPremapping results: '+\ str(mapped)+' read pairs mapped, '+str(unmapped)+' unmapped\n') f.write('\nCoverage plotted: '+\ get_coverage_figure_filename(data_folder, adaID, 'premapped')+'\n')
def make_output_folders(data_folder, adaID, VERBOSE=0, summary=True): '''Make output folders''' from hivwholeseq.utils.generic import mkdirs outfiles = [get_premapped_filename(data_folder, adaID)] if summary: outfiles.append(get_coverage_figure_filename(data_folder, adaID, 'premapped')) for outfile in outfiles: dirname = os.path.dirname(outfile) mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname
def make_output_folders(data_folder, adaID, VERBOSE=0, summary=True): '''Make output folders''' from hivwholeseq.utils.generic import mkdirs outfiles = [get_premapped_filename(data_folder, adaID)] if summary: outfiles.append( get_coverage_figure_filename(data_folder, adaID, 'premapped')) for outfile in outfiles: dirname = os.path.dirname(outfile) mkdirs(dirname) if VERBOSE: print 'Folder created:', dirname