def _plot_stats(st, pdf): "Generate plots and save to report PDF" R = report.Report(pdf) _plot_pd_bars(st.loc[st.Category == "Classification", ].copy(), "Classification of output reads", R, ann=True) _plot_pd_bars(st.loc[st.Category == "Strand", ].copy(), "Strand of oriented reads", R, ann=True) _plot_pd_bars(st.loc[st.Category == "RescueStrand", ].copy(), "Strand of rescued reads", R, ann=True) _plot_pd_bars(st.loc[st.Category == "UnclassHitNr", ].copy(), "Number of hits in unclassified reads", R) _plot_pd_bars(st.loc[st.Category == "RescueHitNr", ].copy(), "Number of hits in rescued reads", R) _plot_pd_bars(st.loc[st.Category == "RescueSegmentNr", ].copy(), "Number of usable segments per rescued read", R) if args.Y > 0: _plot_pd_line( st.loc[st.Category == "AutotuneSample", ].copy(), "Classified reads as function of cutoff(q). Best q={:.4f}".format( args.q), R, vline=args.q) udf = st.loc[st.Category == "Unusable", ].copy() udf.Name = np.log10(1.0 + np.array(udf.Name, dtype=float)) _plot_pd_line(udf, "Log10 length distribution of trimmed away sequences.", R) R.close()
def _plot_stats(st, pdf): "Generate plots and save to report PDF" R = report.Report(pdf) rs = st.loc[st.Category == "Classification", ] _plot_pd_bars(rs.copy(), "Classification of output reads", R, ann=True) found, rescue, unusable = float(rs.loc[rs.Name == "Primers_found", ].Value), float(rs.loc[rs.Name == "Rescue", ].Value), float(rs.loc[rs.Name == "Unusable", ].Value) total = found + rescue + unusable found = found / total * 100 rescue = rescue / total * 100 unusable = unusable / total * 100 sys.stderr.write("-----------------------------------\n") sys.stderr.write("Reads with two primers:\t{:.2f}%\nRescued reads:\t\t{:.2f}%\nUnusable reads:\t\t{:.2f}%\n".format(found, rescue, unusable)) sys.stderr.write("-----------------------------------\n") _plot_pd_bars(st.loc[st.Category == "Strand", ].copy(), "Strand of oriented reads", R, ann=True) _plot_pd_bars(st.loc[st.Category == "RescueStrand", ].copy(), "Strand of rescued reads", R, ann=True) _plot_pd_bars(st.loc[st.Category == "UnclassHitNr", ].copy(), "Number of hits in unclassified reads", R) _plot_pd_bars(st.loc[st.Category == "RescueHitNr", ].copy(), "Number of hits in rescued reads", R) _plot_pd_bars(st.loc[st.Category == "RescueSegmentNr", ].copy(), "Number of usable segments per rescued read", R) if q_bak is None: _plot_pd_line(st.loc[st.Category == "AutotuneSample", ].copy(), "Usable bases as function of cutoff(q). Best q={:.4g}".format(args.q), R, vline=args.q) udf = st.loc[st.Category == "Unusable", ].copy() udf.Name = np.log10(1.0 + np.array(udf.Name, dtype=float)) _plot_pd_line(udf, "Log10 length distribution of trimmed away sequences.", R) R.close()
scores_handle.write(str(match_dir)) scores_handle.write("\n") output_handle.flush() output_handle.close() if args.u is not None: unclass_handle.flush() unclass_handle.close() if args.A is not None: scores_handle.flush() scores_handle.close() if args.r is not None: plotter = report.Report(args.r) plotter.plot_bars_simple( { 'Classified': fwd_matches + rev_matches, 'Unclassified': unclassified }, title="Basic statistics", ylab="Count") plotter.plot_histograms({'nr_hits': unclass_nr_hits}, title="Number of hits in unclassified reads", xlab="Number of hits", ylab="Count") plotter.plot_bars_simple({ '+': fwd_matches, '-': rev_matches },