def _plot_with_rpy2(self, regions, filename): from rpy2 import robjects import rpy2.robjects.lib.ggplot2 as ggplot2 from rpy2.robjects.lib import grid from rpy2.robjects.packages import importr grdevices = importr('grDevices') base = importr('base') grdevices.pdf(file=filename + '.pdf') t = [x for x in range(-self.num_bins, self.num_bins + 1)] for region in regions[:self.num_regs]: if not np.any(region.weighted): logger.warning( "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.") continue middle = (len(region.weighted[0]) - 1) / 2 if middle < self.num_bins: logger.error("Warning: There are less bins calculated for regions than you want to plot.") sys.exit(1) d = {'map': robjects.StrVector( [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]), 't': robjects.FloatVector(t * len(region.weighted)), 'e': robjects.FloatVector([i for sublist in region.weighted for i in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]])} dataf = robjects.DataFrame(d) gp = ggplot2.ggplot(dataf) # first yellow second red p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'), alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle( "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs( y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'), alpha=0.8) + ggplot2.labs( y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'), alpha=0.8) + ggplot2.labs(y="-log10(q-value)", x='bins (' + str(self.bin_res) + ' bp each)') + \ ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))), colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \ ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") g1 = ggplot2.ggplot2.ggplotGrob(p1) g2 = ggplot2.ggplot2.ggplotGrob(p2) g3 = ggplot2.ggplot2.ggplotGrob(p3) robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first') robjects.r("grid::grid.draw(g)") grid.newpage() logger.debug('Plotted region ' + str(region.bin)) grdevices.dev_off()
def get_nogrid_theme(): """ Get no grid theme for ggplot2. """ nogrid_x_theme = theme(**{'panel.grid.major.x': element_blank(), 'panel.grid.minor.x': element_blank(), 'panel.grid.major.y': element_blank(), 'panel.grid.minor.y': element_blank()}) return nogrid_x_theme
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name): columns_to_data = {'subgroup': [], tname: [], 'count': []} max_count = 0 for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items(): for ss, n_count in sses_to_n_count.items(): columns_to_data['subgroup'].append(subgroup) columns_to_data[tname].append(ss) columns_to_data['count'].append(n_count) if n_count > max_count: max_count = n_count r_columns_to_data = { 'subgroup': ro.FactorVector(columns_to_data['subgroup'], levels=ro.StrVector( _sort_subgroup(set(columns_to_data['subgroup'])))), tname: ro.StrVector(columns_to_data[tname]), 'count': ro.IntVector(columns_to_data['count']) } df = ro.DataFrame(r_columns_to_data) max_count = int(max_count / 1000 * 1000 + 1000) histogram_file_path = os.path.join(OUTPUT_PATH, file_name) logging.debug( str.format("The Data Frame for file {}: \n{}", histogram_file_path, df)) grdevices.png(file=histogram_file_path, width=1200, height=800) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \ ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \ ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]), limits=ro.IntVector([0, max_count])) + \ ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1, position=ggplot2.position_dodge(width=0.8), vjust=-0.2) pp.plot() logging.info(str.format("Output step3 file {}", histogram_file_path)) grdevices.dev_off()
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean, prefix=''): pandas2ri.activate() subgroups_to_lrr_count = {} columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []} for subgroup, (acc20means, acc20_count) in subgroups_to_lrrs_acc20mean.items(): subgroups_to_lrr_count[subgroup] = acc20_count for index, acc20mean in enumerate(acc20means): columns_to_data['subgroup'].append(subgroup) columns_to_data['pos'].append(index + 1) columns_to_data['acc20'].append(acc20mean) # Write the count of LRRs for each subgroup to file with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"), 'w') as f: for subgroup, lrr_count in subgroups_to_lrr_count.items(): f.write(str.format("{}: {}\n", subgroup, lrr_count)) # Generate the line chart file r_columns_to_data = { 'subgroup': ro.StrVector(columns_to_data['subgroup']), 'pos': ro.IntVector(columns_to_data['pos']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) line_chart_file_path = os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_acc20_line.png") logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx'))) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname, line_chart_file_path): logging.debug( str.format("Begin to generate {}, data {}", line_chart_file_path, ts_to_acc20s)) ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s) columns_to_data = {tname: [], 'site': [], 'acc20': []} for ss, acc20means in ts_to_acc20mean.items(): for index, acc20mean in enumerate(acc20means): columns_to_data[tname].append(ss) columns_to_data['site'].append(index - 5) columns_to_data['acc20'].append(acc20mean) # Generate the line chart file r_columns_to_data = { tname: ro.StrVector(columns_to_data[tname]), 'site': ro.IntVector(columns_to_data['site']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))), labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5'])) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def test_element_blank(self): eb = ggplot2.element_blank() assert isinstance(eb, ggplot2.ElementBlank)
y = 'lat', \ group = 'group', \ color = 'ObamaShare', \ fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', \ low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.theme(**{ 'legend.position': 'left', \ 'legend.key.size': R.r.unit(2, 'lines'), \ 'legend.title' : ggplot2.element_text(size = 14, hjust=0), \ 'legend.text': ggplot2.element_text(size = 12), \ 'title' : ggplot2.element_text('Obama Vote Share and Distance to Railroads in IL'), \ 'plot.title': ggplot2.element_text(size = 24), 'plot.margin': R.r.unit(R.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.element_blank(), \ 'panel.grid.minor': ggplot2.element_blank(), \ 'panel.grid.major': ggplot2.element_blank(), \ 'axis.ticks': ggplot2.element_blank(), \ 'axis.title.x': ggplot2.element_blank(), \ 'axis.title.y': ggplot2.element_blank(), \ 'axis.title.x': ggplot2.element_blank(), \ 'axis.title.x': ggplot2.element_blank(), \ 'axis.text.x': ggplot2.element_blank(), \ 'axis.text.y': ggplot2.element_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
def plot_volcano_with_r( data, xlabel='Estimated effect (change in H/L ratio)', title='', max_labels=20, color_background='#737373', color_significant='#252525', color_significant_muted='#252525', label_only_large_fc=False, special_labels=None, special_palette=None, base_size=12, label_size=3, x='logFC', y='neg_log10_p_adjust', special_labels_mode='all', xlim=None, skip_labels=None, nudges=None, ): r_data, r_like_data = transform_data_for_ggplot( data, label_only_large_fc=label_only_large_fc, special_labels=special_labels, max_labels=max_labels, special_labels_mode=special_labels_mode, skip_labels=skip_labels, nudges=nudges) plot = r_ggplot2.ggplot(r_data) plot += r_ggplot2.theme_minimal(base_size=base_size) plot += r_ggplot2.theme( **{ 'panel.grid.major': r_ggplot2.element_blank(), 'panel.grid.minor': r_ggplot2.element_blank(), 'panel.border': r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black") }) plot += r_ggplot2.theme( text=r_ggplot2.element_text(family='Helvetica', face='plain')) plot += r_ggplot2.theme( **{ 'plot.title': r_ggplot2.element_text(hjust=0.5), # 'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)), }) aes_points = r_ggplot2.aes_string(x=x, y=y, color='group') scale_points = r_ggplot2.scale_colour_manual( aes_points, values=r_label_palette( r_like_data, special_palette, color_background=color_background, color_significant=color_significant, color_significant_muted=color_significant_muted)) plot += aes_points plot += scale_points if xlim is not None: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim)) else: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits) plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit) plot += r_ggplot2.geom_hline( yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_point(**{'show.legend': False}) aes_text = r_ggplot2.aes_string(label='label') plot += aes_text plot += r_ggrepel.geom_text_repel( aes_text, nudge_x=r_dollar(r_data, 'nudgex'), nudge_y=r_dollar(r_data, 'nudgey'), size=label_size, family='Helvetica', **{ 'show.legend': False, 'point.padding': 0.25, 'min.segment.length': 0, #'max.iter':0, 'segment.color': '#BDBDBD' }, ) plot += r_ggplot2.labs(x=xlabel, y='Adjusted p value (-log10)', title=title) plot.plot()
#print robjects.r('packageVersion("ggplot2")') #--------------------------------------------------------------------# # Annotation # #--------------------------------------------------------------------# mytheme = { 'panel.background':ggplot2.element_rect(fill='white',colour='white'), 'axis.text':ggplot2.element_text(colour="black",size=15, family=FONTFAM), 'axis.line':ggplot2.ggplot2.element_line(size = 1.2, colour="black"), 'axis.title':ggplot2.element_text(colour="black",size=15, family=FONTFAM), 'plot.title':ggplot2.element_text(face="bold", size=20, colour="black",family=FONTFAM), 'panel.grid.minor':ggplot2.element_blank(), 'panel.grid.major':ggplot2.element_blank(), 'legend.key':ggplot2.element_blank(), 'legend.text':ggplot2.element_text(colour="black",size=15, family=FONTFAM), 'strip.text.y':ggplot2.element_text(colour="black",face="bold", size=15,family=FONTFAM), 'strip.text.x':ggplot2.element_text(colour="black",face="bold", size=15,family=FONTFAM), 'text':ggplot2.element_text(colour="black",family=FONTFAM) } #'panel.grid.major':ggplot2.theme_line(colour = "grey90"), pointtheme = { 'panel.background':ggplot2.element_rect(fill='white',colour='black',size=2), 'axis.text':ggplot2.element_text(colour="black",size=15,family=FONTFAM),
#print(palette) color_background = "white" color_grid_major = palette[3] color_axis_text = palette[6] color_axis_title = palette[7] color_title = palette[9] #palette_lines <- brewer.pal("Dark2", n=3) palette_lines <- brewer.pal("Set2", n=8) palette_repeat <- c('#66C2A5', '#66C2A5', '#FC8D62','#FC8D62') linetype_repeat <- c("solid","dashed","solid","dashed") ''') fte_theme = theme( **{ 'axis.ticks': element_blank(), 'panel.background': element_rect(fill=robjects.r.color_background, color=robjects.r.color_background), 'plot.background': element_rect(fill=robjects.r.color_background, color=robjects.r.color_background), 'panel.border': element_rect( color=robjects.r.color_background ), #'panel.grid.major':element_line(color=robjects.r.color_grid_major, size = 0.25), 'panel.grid.minor': element_blank(), 'axis.ticks': element_blank(), 'legend.position':
robjects.r(''' library(RColorBrewer) library(grid) palette <- brewer.pal("Greys", n=9) color_background = palette[2] color_grid_major = palette[3] color_axis_text = palette[6] color_axis_title = palette[7] color_title = palette[9] palette_lines <- brewer.pal("Set2", n=8) ''') size = 9 fte_theme = theme(**{'axis.ticks':element_blank(), 'panel.background':element_rect(fill=robjects.r.color_background, color=robjects.r.color_background), 'plot.background':element_rect(fill=robjects.r.color_background, color=robjects.r.color_background), 'panel.border':element_rect(color=robjects.r.color_background), 'panel.grid.minor':element_blank(), 'axis.ticks':element_blank(), 'legend.position':"right", 'legend.background': element_rect(fill="transparent"), 'legend.text': element_text(size=size,color=robjects.r.color_axis_title), 'legend.title': element_text(size=size,color=robjects.r.color_axis_title), 'plot.title':element_text(color=robjects.r.color_title, size=10, vjust=1.25), 'axis.text.x':element_text(size=size,color=robjects.r.color_axis_text), 'axis.text.y':element_text(size=size,color=robjects.r.color_axis_text), 'axis.title.x':element_text(size=size,color=robjects.r.color_axis_title, vjust=0), #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25), 'axis.title.y':element_text(size=size,color=robjects.r.color_axis_title,angle=90)})
def plot_qc_reads(qc_df): """ Plot number of reads part of a pipeline QC file. """ # Record NA values as 0 qc_df = qc_df.fillna(0)#.set_index("sample") cols = ["sample", "num_reads", "num_mapped", "num_unique_mapped", "num_junctions"] qc_df = qc_df[cols] melted_qc = pandas.melt(qc_df, id_vars=["sample"]) qc_r = conversion_pydataframe(melted_qc) labels = tuple(["num_reads", "num_mapped", "num_unique_mapped", "num_junctions"]) labels = robj.StrVector(labels) variable_i = qc_r.names.index('variable') qc_r[variable_i] = robj.FactorVector(qc_r[variable_i], levels = labels) ggplot2.theme_set(ggplot2.theme_bw(12)) scales = importr("scales") r_opts = r.options(scipen=4) p = ggplot2.ggplot(qc_r) + \ ggplot2.geom_point(aes_string(x="sample", y="value")) + \ ggplot2.scale_y_continuous(trans=scales.log10_trans(), breaks=scales.trans_breaks("log10", robj.r('function(x) 10^x')), labels=scales.trans_format("log10", robj.r('math_format(10^.x)'))) + \ r.xlab("CLIP-Seq samples") + \ r.ylab("No. reads") + \ ggplot2.coord_flip() + \ ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \ theme(**{"panel.grid.major.x": element_blank(), "panel.grid.minor.x": element_blank(), "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)}) p.plot() return r.par(mfrow=np.array([1,2])) num_samples = len(qc_df.num_reads) r.par(bty="n", lwd=1.7, lty=2) r_opts = r.options(scipen=4) r.options(r_opts) r.dotchart(convert_to_r_matrix(qc_df[["num_reads", "num_mapped", "num_unique_mapped"]]), xlab="No. reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8) r.par(bty="n") r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped", "num_ribo", "num_junctions"]]), xlab="No. reads", lcolor="black", pch=19, gcolor="darkblue", cex=0.8)