コード例 #1
0
def main():

    savepath = args.savepath
    fastx = args.fastx
    tag = args.tag

    if savepath is None:
        savepath = os.getcwd()
    else:
        savepath = misc.mkdir(savepath)

    if tag is None:
        tag = misc.get_fname(fastx)

    if misc._getextension(fastx) == 'fastq':
        fq = True
    else:
        fq = False

    rawdata = cstats.GC_per_read(cstats.readfast(fastx), fq=fq)

    # print os.path.join(savepath, '{}_summary.stats'.format(tag))

    if args.raw:
        rawdata.to_csv(os.path.join(savepath, '{}_raw.stats'.format(tag)))

    summary = cstats.get_stats(df=rawdata)
    summary.to_csv(os.path.join(savepath, '{}_summary.stats'.format(tag)))
    # print summary.round(2).to_string()

    if args.report:
        from wub.vis import report
        Plotter = report.Report(os.path.join(savepath, '{}.pdf'.format(tag)))

        rawdata = rawdata.sort_values('Seqlen', ascending=True)

        rawdata['cumsum'] = rawdata["Seqlen"].cumsum()
        rawdata['norm'] = 100.0 * rawdata['cumsum'] / rawdata['cumsum'].max()

        Plotter.plot_line(
            data=rawdata,
            x='Seqlen',
            y='norm',
            title='Normalized cumulative plot',
            xlab='length (bp)',
            ylab="normalized (%)",
        )

        # df1.sort_values('Seqlen', ascending=False)
        # df1["cumsum1"] = df1['Seqlen'].cumsum()
        # Plotter.plot_line(data=rawdata, x='Cumsum1', y=df1.reset_index().index, title='Ordered cumulative sum plot', xlab="contigs ordered largest to smallest", ylab='cumulative sum')

        Plotter.plot_scatter(data=rawdata,
                             x='GC content (%)',
                             y='Seqlen',
                             title='GC content vs length plot',
                             xlab="GC content (%)",
                             ylab="length (bp)",
                             alpha=0.5,
                             ylim=0,
                             xlim=0)
        if 'mean_q' in rawdata:

            Plotter.plot_scatter(data=rawdata,
                                 x='mean_q',
                                 y='Seqlen',
                                 title='Mean Q score vs length',
                                 xlab='Mean Q',
                                 ylab='length',
                                 alpha=0.5,
                                 xlim=rawdata['mean_q'].min() - 0.5,
                                 ylim=rawdata['Seqlen'].min() - 0.5)

        Plotter.close()
コード例 #2
0
ファイル: bam_alignment_qc.py プロジェクト: alexiswl/wub
                           invert_yaxis=True,
                           title="",
                           xlab="From context",
                           ylab="To base")


if __name__ == '__main__':
    args = parser.parse_args()
    verbose = not args.Q
    tag = args.t
    if tag is None:
        tag = os.path.basename(args.bam)
    context_sizes = args.n.split(",")
    context_sizes = (int(context_sizes[0]), int(context_sizes[1]))

    plotter = report.Report(args.r)

    references = seq_util.read_seq_records_dict(args.f)

    err_read_stats = bam_stats.error_and_read_stats(
        args.bam,
        references,
        region=args.c,
        context_sizes=context_sizes,
        min_aqual=args.q,
        verbose=verbose)
    read_stats = err_read_stats['read_stats']
    error_stats = err_read_stats['events']
    base_stats = err_read_stats['base_stats']
    indel_stats = err_read_stats['indel_dists']