def main(): savepath = args.savepath fastx = args.fastx tag = args.tag if savepath is None: savepath = os.getcwd() else: savepath = misc.mkdir(savepath) if tag is None: tag = misc.get_fname(fastx) if misc._getextension(fastx) == 'fastq': fq = True else: fq = False rawdata = cstats.GC_per_read(cstats.readfast(fastx), fq=fq) # print os.path.join(savepath, '{}_summary.stats'.format(tag)) if args.raw: rawdata.to_csv(os.path.join(savepath, '{}_raw.stats'.format(tag))) summary = cstats.get_stats(df=rawdata) summary.to_csv(os.path.join(savepath, '{}_summary.stats'.format(tag))) # print summary.round(2).to_string() if args.report: from wub.vis import report Plotter = report.Report(os.path.join(savepath, '{}.pdf'.format(tag))) rawdata = rawdata.sort_values('Seqlen', ascending=True) rawdata['cumsum'] = rawdata["Seqlen"].cumsum() rawdata['norm'] = 100.0 * rawdata['cumsum'] / rawdata['cumsum'].max() Plotter.plot_line( data=rawdata, x='Seqlen', y='norm', title='Normalized cumulative plot', xlab='length (bp)', ylab="normalized (%)", ) # df1.sort_values('Seqlen', ascending=False) # df1["cumsum1"] = df1['Seqlen'].cumsum() # Plotter.plot_line(data=rawdata, x='Cumsum1', y=df1.reset_index().index, title='Ordered cumulative sum plot', xlab="contigs ordered largest to smallest", ylab='cumulative sum') Plotter.plot_scatter(data=rawdata, x='GC content (%)', y='Seqlen', title='GC content vs length plot', xlab="GC content (%)", ylab="length (bp)", alpha=0.5, ylim=0, xlim=0) if 'mean_q' in rawdata: Plotter.plot_scatter(data=rawdata, x='mean_q', y='Seqlen', title='Mean Q score vs length', xlab='Mean Q', ylab='length', alpha=0.5, xlim=rawdata['mean_q'].min() - 0.5, ylim=rawdata['Seqlen'].min() - 0.5) Plotter.close()
invert_yaxis=True, title="", xlab="From context", ylab="To base") if __name__ == '__main__': args = parser.parse_args() verbose = not args.Q tag = args.t if tag is None: tag = os.path.basename(args.bam) context_sizes = args.n.split(",") context_sizes = (int(context_sizes[0]), int(context_sizes[1])) plotter = report.Report(args.r) references = seq_util.read_seq_records_dict(args.f) err_read_stats = bam_stats.error_and_read_stats( args.bam, references, region=args.c, context_sizes=context_sizes, min_aqual=args.q, verbose=verbose) read_stats = err_read_stats['read_stats'] error_stats = err_read_stats['events'] base_stats = err_read_stats['base_stats'] indel_stats = err_read_stats['indel_dists']