def plot_qc_percents(qc_df): """ Plot percentage parts of pipeline QC file. """ # Record NA values as 0 qc_df = qc_df.fillna(0).set_index("sample") r.par(mfrow=np.array([1,2])) num_samples = len(qc_df.num_reads) r_opts = r.options(scipen=10) r.options(r_opts) r.par(bty="n", lwd=1.7, lty=2) r.dotchart(convert_to_r_matrix(qc_df[["percent_mapped", "percent_unique", "percent_ribo"]]), xlab="Percent reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8) r.par(bty="n") r.dotchart(convert_to_r_matrix(qc_df[["percent_exons", "percent_cds", "percent_3p_utr", "percent_5p_utr", "percent_introns"]]), xlab="Percent reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8)
def RWarnings(level): """ There are two levels: 'error' and 'ignore'. With 'error', R warnings are treated as R errors and can be caught as RRuntimeError. With 'ignore', they are ignored and not even printed. Beware that all Python warning filters will be reset after using this context manager. """ new = {"error": 2, "ignore": -1}[level] old = r.getOption("warn") warnings.filterwarnings("ignore", category=RRuntimeWarning) r.options(warn=new) yield r.options(warn=old) warnings.resetwarnings()
def plot_qc_reads(qc_df): """ Plot number of reads part of a pipeline QC file. """ # Record NA values as 0 qc_df = qc_df.fillna(0)#.set_index("sample") cols = ["sample", "num_reads", "num_mapped", "num_unique_mapped", "num_junctions"] qc_df = qc_df[cols] melted_qc = pandas.melt(qc_df, id_vars=["sample"]) qc_r = conversion_pydataframe(melted_qc) labels = tuple(["num_reads", "num_mapped", "num_unique_mapped", "num_junctions"]) labels = robj.StrVector(labels) variable_i = qc_r.names.index('variable') qc_r[variable_i] = robj.FactorVector(qc_r[variable_i], levels = labels) ggplot2.theme_set(ggplot2.theme_bw(12)) scales = importr("scales") r_opts = r.options(scipen=4) p = ggplot2.ggplot(qc_r) + \ ggplot2.geom_point(aes_string(x="sample", y="value")) + \ ggplot2.scale_y_continuous(trans=scales.log10_trans(), breaks=scales.trans_breaks("log10", robj.r('function(x) 10^x')), labels=scales.trans_format("log10", robj.r('math_format(10^.x)'))) + \ r.xlab("CLIP-Seq samples") + \ r.ylab("No. reads") + \ ggplot2.coord_flip() + \ ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \ theme(**{"panel.grid.major.x": element_blank(), "panel.grid.minor.x": element_blank(), "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)}) p.plot() return r.par(mfrow=np.array([1,2])) num_samples = len(qc_df.num_reads) r.par(bty="n", lwd=1.7, lty=2) r_opts = r.options(scipen=4) r.options(r_opts) r.dotchart(convert_to_r_matrix(qc_df[["num_reads", "num_mapped", "num_unique_mapped"]]), xlab="No. reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8) r.par(bty="n") r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped", "num_ribo", "num_junctions"]]), xlab="No. reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8)