def make_output(tss_cov, out_prefix, upstream, downstream): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i]) raw_out.close() # make plot data structures tss_i = ro.IntVector(range(-upstream,downstream+1)) cov = ro.FloatVector(tss_cov) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s_full.pdf' % out_prefix) gp.plot() grdevices.dev_off() # construct zoomed plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s_zoom.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def test_aes(self): gp = ggplot2.ggplot(mtcars) gp += ggplot2.aes(x='wt', y='mpg') gp += ggplot2.geom_point() assert isinstance(gp, ggplot2.GGPlot) gp = ggplot2.ggplot(mtcars) gp += ggplot2.aes('wt', 'mpg') gp += ggplot2.geom_point() assert isinstance(gp, ggplot2.GGPlot)
def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-window/2,window/2+1): print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2*range(-window/2,window/2+1)) cov_r = ro.FloatVector(cov+control_cov) labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov)) df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels}) # construct plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('Position relative to splice site') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open("%s_raw.txt" % out_prefix, "w") for i in range(-window / 2, window / 2 + 1): print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1)) cov_r = ro.FloatVector(cov + control_cov) labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov)) df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels}) # construct plot gp = ( ggplot2.ggplot(df) + ggplot2.aes_string(x="splice_i", y="cov", colour="label") + ggplot2.geom_point() + ggplot2.scale_x_continuous("Position relative to splice site") + ggplot2.scale_y_continuous("Coverage") + ggplot2.scale_colour_discrete("") ) # plot to file grdevices.pdf(file="%s.pdf" % out_prefix) gp.plot() grdevices.dev_off()
def make_output(cov, out_prefix, window): # dump raw counts to file raw_out = open("%s_raw.txt" % out_prefix, "w") for i in range(-window / 2, window / 2 + 1): print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(range(-window / 2, window / 2 + 1)) cov = ro.FloatVector(cov) df = ro.DataFrame({"splice_i": splice_i, "cov": cov}) # construct plot gp = ( ggplot2.ggplot(df) + ggplot2.aes_string(x="splice_i", y="cov") + ggplot2.geom_point() + ggplot2.scale_x_continuous("Position relative to splice site") + ggplot2.scale_y_continuous("Coverage") ) # plot to file grdevices.pdf(file="%s.pdf" % out_prefix) gp.plot() grdevices.dev_off()
def render_plot(gp, args): """Render a plot using ggplot :gp: A base ggplot2 object :x: The x value expression :y: The y value expression :type: The type of plot to make """ args = util.Namespace(args) import rpy2.robjects.lib.ggplot2 as ggplot2 pp = gp + ggplot2.aes_string(x=args.x, y=args.y) if args.type == 'points': pp += ggplot2.geom_point() elif args.type == 'lines': pp += ggplot2.geom_line() elif args.type == 'boxplot': pp += ggplot2.geom_boxplot() else: raise Exception("{0} not implemented".format(args.type)) if args.facets is not None: try: pp += ggplot2.facet_grid(ro.Formula(args.facets)) except Exception: pass try: pp.plot() except Exception: pass
def line_plot(self, data, title, ylabel, img_file, x='date', y='size', c='type', clabel=''): if PLOTLIB == 'ggplot': # date_label = "%Y\n%b" date_label = "%Y\n%W" # year + week number p = ggplot(data, aes(x=x, y=y, color=c)) \ + ggtitle(title) \ + ylab(ylabel) \ + xlab(' ') \ + scale_x_date(breaks=date_breaks('3 months'), labels=date_label) \ + geom_line() + geom_point() elif PLOTLIB == 'rpy2.ggplot2': # convert y axis to float because R uses 32-bit signed integers, # values > 2 bln. (2^31) will overflow data[y] = data[y].astype(float) p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x=x, y=y, color=c) \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='', y=ylabel, color=clabel) img_path = os.path.join(PLOTDIR, img_file) p.save(img_path) # data.to_csv(img_path + '.csv') return p
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: raw_out = open( '%s_raw/%s_%s.txt' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')), 'w') for i in range(-upstream, downstream + 1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][ upstream + i], control_te_tss_cov[te][upstream + i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2 * range(-upstream, downstream + 1)) labels = ro.StrVector(['Main'] * (upstream + downstream + 1) + ['Control'] * (upstream + downstream + 1)) for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf( file='%s_plot/%s_%s.pdf' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_'))) gp.plot() grdevices.dev_off()
def line_plot(pdf_file, data, x, y, var, null_label="N/A", linetype=None, title=None, xlab=None, ylab=None, colorname=None, linename=None, **extra_aes_params): pdf(pdf_file, width=11.7, height=8.3, paper="a4r") if any(data[x].isnull()): labels = [null_label] + map(str, sorted(set( data[data[x].notnull()][x]))) labels = robjects.StrVector(labels) nulls = data[x].isnull() label_vals = dict(zip(labels, range(len(labels)))) data[x] = data[x].astype("str") data[x][nulls] = null_label data['sortcol'] = data[x].map(label_vals.__getitem__) data.sort('sortcol', inplace=True) else: labels = None if linetype and linetype != var: data['group'] = data[var].map(str) + data[linetype].map(str) else: data['group'] = data[var] rdata = common.convert_to_r_dataframe(data) if labels: ix = rdata.names.index(x) rdata[ix] = ordered(rdata[ix], levels=labels) gp = gg2.ggplot(rdata) pp = ( gp + gg2.geom_point(size=3) + gg2.scale_colour_hue(name=(colorname or var)) + #gg2.scale_colour_continuous(low="black") + gg2.aes_string(x=x, y=y, color=var, variable=var) + ggtitle(title or "") + xlabel(xlab or x) + ylabel(ylab or y) #+ #gg2.scale_y_continuous(breaks=seq(0.0, 1.0, 0.05)) ) # line type stuff if linetype: pp += gg2.geom_path(gg2.aes_string(group='group', linetype=linetype), size=0.5) pp += gg2.scale_linetype(name=(linename or linetype)) else: pp += gg2.geom_path(gg2.aes_string(group='group'), size=0.5) pp.plot() dev_off()
def plot_total_bp(parser, args, tot_bp_per_pore): """ Plot the pore performance """ import math r = robjects.r r.library("ggplot2") grdevices = importr("grDevices") flowcell_layout = minion_flowcell_layout() pore_values = [] for pore in flowcell_layout: if pore in tot_bp_per_pore: pore_values.append(math.log10(tot_bp_per_pore[pore])) else: pore_values.append(0) # make a data frame of the lists d = { "rownum": robjects.IntVector(range(1, 17) * 32), "colnum": robjects.IntVector(sorted(range(1, 33) * 16)), "log10_tot_bp": robjects.IntVector(pore_values), "labels": robjects.IntVector(flowcell_layout), } df = robjects.DataFrame(d) gp = gg.ggplot(df) pp = ( gp + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)") + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7) + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2) + gg.scale_colour_gradient2(low="black", mid="black", high="red") + gg.coord_fixed(ratio=1.4) + gg.labs(x=gg.NULL, y=gg.NULL) ) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width=11, height=8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width=11, height=8.5, units="in", res=300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print ("Type enter to exit.") raw_input()
def plot_start(x, y): import rpy2.robjects.lib.ggplot2 as ggplot2 ##由于这一条import会有警告信息,放到这里,只有调用这个函数才会出现警告。 utils = importr('utils') data = utils.read_csv(glob('*.csv')[0]) plot = ggplot2.ggplot(data) plot = (plot + ggplot2.aes_string(x=x, y=y) + ggplot2.geom_point() + ggplot2.scale_colour_gradient(low="yellow", high="red") + ggplot2.labs(title="mtcars", x='wt', y='mpg')) plot.save('point.png')
def plot_ROC(self, path): robjects.r["pdf"](path, width=14, height=8) df = self.df # print(df) gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) gp += ggplot2.aes_string(x="fpr", y="tpr") gp += ggplot2.geom_line(color="blue") gp += ggplot2.geom_point(size=2) gp.plot()
def rank_abundance_plot(counter, name): grdevices.png('analytics_out/{0}_rank_abundance.png'.format(name)) ranks, fracs = rank_abundance_data(counter) df = robjects.DataFrame({'rank': ranks, 'f': fracs}) pp = ggplot.ggplot(df) + \ ggplot.aes_string(x = 'rank', y = 'f') + \ ggplot.geom_point() + \ ggplot.scale_y_log10(name = 'fraction of hits') pp.plot() grdevices.dev_off()
def plot_ROC(self, path): robjects.r['pdf'](path, width=14, height=8) df = self.df print(df) gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) gp += ggplot2.aes_string(x='fpr', y='tpr') gp += ggplot2.geom_line(color='blue') gp += ggplot2.geom_point(size=2) gp.plot()
def plot_total_bp(parser, args, tot_bp_per_pore): """ Plot the pore performance """ import math r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') flowcell_layout = minion_flowcell_layout() pore_values = [] for pore in flowcell_layout: if pore in tot_bp_per_pore: pore_values.append(math.log10(tot_bp_per_pore[pore])) else: pore_values.append(0) # make a data frame of the lists d = {'rownum': robjects.IntVector(range(1,17)*32), 'colnum': robjects.IntVector(sorted(range(1,33)*16)), 'log10_tot_bp': robjects.IntVector(pore_values), 'labels': robjects.IntVector(flowcell_layout) } df = robjects.DataFrame(d) gp = gg.ggplot(df) pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \ x = 'factor(colnum)') \ + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \ + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \ + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \ + gg.coord_fixed(ratio=1.4) \ + gg.labs(x=gg.NULL, y=gg.NULL) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 11, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 11, height = 8.5, units = "in", res = 300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def plot_all_errors(self, path): # print self.error_matrix[0] robjects.r["pdf"](path, width=14, height=8) df = pandas.melt(self.df, id_vars="iteration") gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) x_col = "iteration" gp += ggplot2.aes_string(x=x_col, y="value", color="variable") gp += ggplot2.geom_point(size=2) gp += ggplot2.geom_line() gp.plot()
def plotStats(data, outFolder, tiles, prop="qual", prefix="", high="yellow", low="blue", pdf=False, detail=True): #overview plot p = ggplot.ggplot(data) p = p + ggplot.aes_string(x="x", y="y", col=prop) \ + ggplot.geom_point(size=0.1) \ + ggplot.facet_wrap(robjects.Formula("~ tile")) \ + ggplot.scale_colour_gradient(high=high, low=low) \ + ggplot.ggtitle("Overview %s" % (prop)) if prefix: fileName = "%s_overview_%s.png" % (prefix, prop) else: fileName = "overview_%s.png" % (prop) p.save(os.path.join(outFolder, fileName), scale=2) #detail plots if detail: detailFolder = os.path.join(outFolder, "detailPlots") for t in tiles: p = ggplot.ggplot(data.rx(data.rx2("tile").ro == t, True)) p = p + ggplot.aes_string(x="x", y="y", col=prop) \ + ggplot.geom_point(size=1) \ + ggplot.facet_wrap(robjects.Formula("~ tile")) \ + ggplot.scale_colour_gradient(high=high, low=low) \ + ggplot.ggtitle("%i %s" % (t, prop)) if prefix: fileName = "%s_%i_%s.png" % (prefix, t, prop) else: fileName = "%i_%s.png" % (t, prop) p.save(os.path.join(detailFolder, fileName), scale=2) if pdf: fileName = "%s%i_%s.pdf" % (prefix, t, prop) p.save(os.path.join(detailFolder, fileName), scale=2)
def single_locus_net_informativeness(locus_table, net_pi_table, locus): qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table, net_pi_table, locus) frame = robjects.r('''dbGetQuery(con, {})'''.format(qry)) gg_frame = ggplot2.ggplot(frame) plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \ ggplot2.geom_point(size = 3, alpha = 0.4) + \ ggplot2.scale_x_reverse('years ago') + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.opts(title = locus) return plot
def create_plot(filename, data, performance_object): grdevices.png(file=filename) (ggplot2.ggplot(data) + ggplot2.aes_string( x="dimension", y="mean.%s" % performance_object) + ggplot2.geom_point(ggplot2.aes_string(colour="signature")) + ggplot2.geom_errorbar( ggplot2.aes_string( ymax="mean.%s+stderror.%s" % (performance_object, performance_object), ymin="mean.%s-stderror.%s" % (performance_object, performance_object), ))).plot() grdevices.dev_off()
def compute(x0, y0, x1, y1): # Selected square sel = 255 - average(img[y0:y1,x0:x1],2) # Average across x’s ysel = average(sel,1) line = ysel xs = mgrid[0:line.shape[0]] # Pass the data to R rxs = robjects.FloatVector(xs) rys = robjects.FloatVector(line) rdf = robjects.DataFrame({'x': rxs, 'y': rys}) robjects.globalenv['xs'] = rxs robjects.globalenv['df'] = rdf #print(rys.r_repr()) # Fir an R model robjects.r('''fit <- nls(y ~ (off + c1 * exp(-(x-mu1)**2/(2*sg1**2)) + c2 * exp(-(x-mu2)**2/(2*sg2**2))), data=df, start=list(off = 90, c1=120, mu1=30, sg1=10, c2=120, mu2=60, sg2=10), algorithm='port')''') # Get fit results robjects.r('''k <- coef(fit) fitdat <- data.frame(x=xs) fitdat$y <- predict(fit, newdata=fitdat) ## Independent Gaussians fitg1 <- data.frame(x=xs) fitg1$y <- k[['off']] + k[['c1']] * exp(-(xs-k[['mu1']])**2/(2*k[['sg1']]**2)) fitg2 <- data.frame(x=xs) fitg2$y <- k[['off']] + k[['c2']] * exp(-(xs-k[['mu2']])**2/(2*k[['sg2']]**2))''') # Plot R fits fitdat = robjects.globalenv['fitdat'] fitg1 = robjects.globalenv['fitg1'] fitg2 = robjects.globalenv['fitg2'] pp = ggplot2.ggplot(rdf) \ + ggplot2.aes_string(x='x', y='y') \ + ggplot2.geom_point() \ + ggplot2.geom_smooth(data=fitdat, stat="identity", size=1.5) \ + ggplot2.geom_smooth(data=fitg1, stat="identity") \ + ggplot2.geom_smooth(data=fitg2, stat="identity") pp.plot() # Compute the ratio of the gaussian integrals ratio = robjects.r('''k[['c1']]*k[['sg1']]/(k[['c1']]*k[['sg1']]+k[['c2']]*k[['sg2']])''') title('ratio='+str(ratio)) show()
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel): grdevices.pdf(file=plot_file_path) gp = ggplot2.ggplot(data) pp = gp \ + ggplot2.aes_string(x='genotype', y='abundance') \ + ggplot2.geom_boxplot() \ + ggplot2.ggtitle(title) \ + ggplot2.labs(x=xlabel, y=ylabel) \ + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \ + ggplot2.geom_point() pp.plot() grdevices.dev_off()
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean, prefix=''): pandas2ri.activate() subgroups_to_lrr_count = {} columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []} for subgroup, (acc20means, acc20_count) in subgroups_to_lrrs_acc20mean.items(): subgroups_to_lrr_count[subgroup] = acc20_count for index, acc20mean in enumerate(acc20means): columns_to_data['subgroup'].append(subgroup) columns_to_data['pos'].append(index + 1) columns_to_data['acc20'].append(acc20mean) # Write the count of LRRs for each subgroup to file with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"), 'w') as f: for subgroup, lrr_count in subgroups_to_lrr_count.items(): f.write(str.format("{}: {}\n", subgroup, lrr_count)) # Generate the line chart file r_columns_to_data = { 'subgroup': ro.StrVector(columns_to_data['subgroup']), 'pos': ro.IntVector(columns_to_data['pos']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) line_chart_file_path = os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_acc20_line.png") logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx'))) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None): """Makes correlation plots between CEL files for the same cell type""" fsize = 10 names_1 = [] names_2 = [] cors = [] titles = [] for ex_idx, ex in enumerate(expt_names): # Indices of CEL files (columns of expr) corresponding to that cell type tmp_idx = expt_name_idx[ex] plot_idx = 0 for i in range(len(tmp_idx)): name1 = re.sub('_', '.', cel_names[tmp_idx[i]]) for j in range(i + 1, len(tmp_idx)): name2 = re.sub('_', '.', cel_names[tmp_idx[j]]) plot_idx += 1 cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1] names_1.append(name1) names_2.append(name2) cors.append(cor) titles.append(ex + '-' + str(plot_idx)) df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 'y':ro.FloatVector(expr[:, tmp_idx[j]])}) gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \ ggplot2.geom_point(size = 1) + \ ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \ ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.title.x':ggplot2.element_text(size = 8), 'axis.text.y':ggplot2.element_text(size = fsize), 'axis.title.y':ggplot2.element_text(size = 8, angle = 90), 'plot.title':ggplot2.element_text(size = fsize)}) if outdir is None: gp.plot() else: if not os.path.isdir(outdir): os.makedirs(outdir) outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png') ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm') df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles) if not outdir is None: df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t') return df
def multiple_locus_net_informativeness_scatterplot(locus_table, net_pi_table, loci): if loci[0].lower() != 'all': qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table, net_pi_table, tuple(loci)) else: qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id"'''.format(locus_table, net_pi_table) frame = robjects.r('''dbGetQuery(con, {})'''.format(qry)) gg_frame = ggplot2.ggplot(frame) plot = gg_frame + ggplot2.aes_string(x = 'time', y = 'pi') + \ ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), \ size = 3, alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \ ggplot2.scale_y_continuous('phylogenetic informativeness') return plot
def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group,logx,logy): grdevices.png(file=filename, width=512, height=512) data = ggplot2.ggplot(dataframe) aes = ggplot2.aes_string(x=parm1, y=parm2,colour=group) geom = ggplot2.geom_point(alpha = 0.7) labs = ggplot2.labs(x=parm1+ " " + units1, y=parm2 + " " + units2) xlogscale = ggplot2.scale_x_log10() ylogscale = ggplot2.scale_y_log10() if logx == True and logy == True: gg = data + aes + geom + labs + xlogscale + ylogscale elif logx == True: gg = data + aes + geom + labs + xlogscale elif logy == True: gg = data + aes + geom + labs + ylogscale else: gg = data + aes + geom + labs gg.plot() grdevices.dev_off()
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname, line_chart_file_path): logging.debug( str.format("Begin to generate {}, data {}", line_chart_file_path, ts_to_acc20s)) ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s) columns_to_data = {tname: [], 'site': [], 'acc20': []} for ss, acc20means in ts_to_acc20mean.items(): for index, acc20mean in enumerate(acc20means): columns_to_data[tname].append(ss) columns_to_data['site'].append(index - 5) columns_to_data['acc20'].append(acc20mean) # Generate the line chart file r_columns_to_data = { tname: ro.StrVector(columns_to_data[tname]), 'site': ro.IntVector(columns_to_data['site']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))), labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5'])) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group, logx, logy): grdevices.png(file=filename, width=512, height=512) data = ggplot2.ggplot(dataframe) aes = ggplot2.aes_string(x=parm1, y=parm2, colour=group) geom = ggplot2.geom_point(alpha=0.7) labs = ggplot2.labs(x=parm1 + " " + units1, y=parm2 + " " + units2) xlogscale = ggplot2.scale_x_log10() ylogscale = ggplot2.scale_y_log10() if logx == True and logy == True: gg = data + aes + geom + labs + xlogscale + ylogscale elif logx == True: gg = data + aes + geom + labs + xlogscale elif logy == True: gg = data + aes + geom + labs + ylogscale else: gg = data + aes + geom + labs gg.plot() grdevices.dev_off()
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2*range(-upstream,downstream+1)) labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1)) for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_'))) gp.plot() grdevices.dev_off()
def build(self): ##print grdevices.palette() if self.spec['type'] == 'csv' : df = robjects.DataFrame.from_csvfile('./data/' + self.spec['name'] + '.csv') else : print type(self.spec['name']) samplename = self.spec['name'].encode('ascii','ignore') df = data(datasets).fetch(samplename)[samplename] #print df grdevices.png(file=self.sfilename, width=700, height=400) pp = ggplot2.ggplot(df) ppargs = {} if len(self.spec['viz[xaxis]']) != 0 : ppargs['x'] = self.spec['viz[xaxis]'] if len(self.spec['viz[yaxis]']) != 0 : ppargs['y'] = self.spec['viz[yaxis]'] if len(self.spec['viz[color]']) != 0 : ppargs['colour'] = self.spec['viz[color]'] if len(self.spec['viz[shape]']) != 0 : ppargs['shape'] = self.spec['viz[shape]'] player1 = self.spec['viz[layer1]'] if len(self.spec['viz[layer1]']) != 0 else None player2 = self.spec['viz[layer2]'] if len(self.spec['viz[layer2]']) != 0 else None pp = pp + ggplot2.aes_string(**ppargs) ##pp = pp + ggplot2.geom_bar(stat="identity", fill="white", colour="darkgreen") ##pp = pp + ggplot2.scale_fill_brewer(palette="blues") ##pp = pp + ggplot2.geom_point() pp = pp + ggplot2.geom_point(size=5) pp.plot() grdevices.dev_off() return self.cfilename
def make_output(cov, out_prefix, window): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-window/2,window/2+1): print >> raw_out, '%d\t%e' % (i, cov[window/2+i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(range(-window/2,window/2+1)) cov = ro.FloatVector(cov) df = ro.DataFrame({'splice_i':splice_i, 'cov':cov}) # construct plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='splice_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('Position relative to splice site') + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def plot_domain_cumul(self, crawl): # -- coverage (cumulative pages) per domain data = self.histogr data = data[data['type'].isin(['domain'])] data = data[data['crawl'] == crawl] data = data[data['type_counted'].isin(['url'])] data['urls'] = data['count']*data['frequency'] print(data) data = data[['urls', 'count', 'frequency']] data = data.sort_values(['count'], ascending=0) data['cum_domains'] = data['frequency'].cumsum() data['cum_urls'] = data['urls'].cumsum() data_perc = data.apply(lambda x: round(100.0*x/float(x.sum()), 1)) data['%domains'] = data_perc['frequency'] data['%urls'] = data_perc['urls'] data['%cum_domains'] = data['cum_domains'].apply( lambda x: round(100.0*x/float(data['frequency'].sum()), 1)) data['%cum_urls'] = data['cum_urls'].apply( lambda x: round(100.0*x/float(data['urls'].sum()), 1)) with pandas.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 200): print(data) img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png') # data.to_csv(img_path + '.csv') title = 'Cumulative URLs for Top Domains' p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='cum_domains', y='cum_urls') \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='domains cumulative', y='URLs cumulative') \ + ggplot2.scale_y_log10() \ + ggplot2.scale_x_log10() p.save(img_path) return p
def plot_domain_cumul(self, crawl): # -- coverage (cumulative pages) per domain data = self.histogr data = data[data['type'].isin(['domain'])] data = data[data['crawl'] == crawl] data = data[data['type_counted'].isin(['url'])] data['urls'] = data['count'] * data['frequency'] print(data) data = data[['urls', 'count', 'frequency']] data = data.sort_values(['count'], ascending=0) data['cum_domains'] = data['frequency'].cumsum() data['cum_urls'] = data['urls'].cumsum() data_perc = data.apply(lambda x: round(100.0 * x / float(x.sum()), 1)) data['%domains'] = data_perc['frequency'] data['%urls'] = data_perc['urls'] data['%cum_domains'] = data['cum_domains'].apply( lambda x: round(100.0 * x / float(data['frequency'].sum()), 1)) data['%cum_urls'] = data['cum_urls'].apply( lambda x: round(100.0 * x / float(data['urls'].sum()), 1)) with pandas.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 200): print(data) img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png') # data.to_csv(img_path + '.csv') title = 'Cumulative URLs for Top Domains' p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='cum_domains', y='cum_urls') \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='domains cumulative', y='URLs cumulative') \ + ggplot2.scale_y_log10() \ + ggplot2.scale_x_log10() p.save(img_path) return p
import math, datetime import rpy2.robjects.lib.ggplot2 as ggplot2 import rpy2.robjects as ro from rpy2.robjects.packages import importr base = importr("base") datasets = importr("datasets") mtcars = datasets.data.fetch("mtcars")["mtcars"] pp = ( ggplot2.ggplot(mtcars) + ggplot2.aes_string(x="wt", y="mpg", col="factor(cyl)") + ggplot2.geom_point() + ggplot2.geom_smooth(ggplot2.aes_string(group="cyl"), method="lm") ) pp.plot()
def test_vars(self): gp = (ggplot2.ggplot(mtcars) + ggplot2.aes(x='wt', y='mpg') + ggplot2.geom_point() + ggplot2.facet_wrap(ggplot2.vars('gears'))) assert isinstance(gp, ggplot2.GGPlot)
d['code'] = StrVector([x[0] for x in combos]) + StrVector([x[0] for x in combos_r]) d['sequence'] = StrVector([x[-2] for x in combos]) + StrVector([x[0] for x in combos_r]) d['time'] = FloatVector([x for x in times]) + FloatVector(times_r) d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector([x[3] for x in combos_r]) d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.labs(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width = 712, height = 512) p.plot() grdevices.dev_off()
ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \ ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \ ggplot2.stat_smooth(color="black") + \ ggplot2.opts(**{'legend.position': 'none'}) + \ ggplot2.scale_x_continuous("Obama Vote Share") + \ ggplot2.scale_y_continuous("Distance to nearest Railroad") p_sub.plot(vp=vp_sub) grdevices.dev_off()
def testAdd(self): gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt', y='mpg') + \ ggplot2.geom_point() self.assertTrue(isinstance(pp, ggplot2.GGPlot))
import math, datetime import time import rpy2.robjects.lib.ggplot2 as ggplot2 import rpy2.robjects as ro from rpy2.robjects.packages import importr from rpy2.interactive import process_revents grdevices = importr('grDevices') process_revents.start() base = importr('base') datasets= importr('datasets') mtcars = datasets.__rdata__.fetch('mtcars')['mtcars'] pp = ggplot2.ggplot(mtcars) + ggplot2.aes_string(x='wt', y='mpg', col='factor(cyl)') + ggplot2.geom_point() + ggplot2.geom_smooth(ggplot2.aes_string(group = 'cyl'), method = 'lm') #pp.plot() #process_revents.start() print(pp) process_revents.process_revents() while True: time.sleep(1) process_revents.stop()
xmin = np.min(x) xmax = np.max(x) xs = np.linspace(xmin, xmax, num=100).reshape(100, 1) lm = LinearRegression() # The training data for scikit models must be in matrix # form, i.e. columns == features, rows == observations. # For this we need to reshape the 1-dimensional arrays. X = corr_nci60.reshape(len(x), 1) y = corr_sec lm.fit(X, y) y_pred = lm.predict(xs) # Plot the data using the R-bridge rpy and ggplot p = gg.ggplot(pd.DataFrame()) p += gg.geom_point( gg.aes_string(x='r_nci60', y='r_sec'), data=pd.DataFrame({ 'r_nci60': corr_nci60, 'r_sec': corr_sec }) ) p += gg.geom_line( gg.aes_string(x='x', y='y'), data=pd.DataFrame({ 'x': xs.reshape(-1), 'y': y_pred }), color='red' ) p.plot()
number_of_peaks = len(dataf[0]) cvI = [] newRow = [] for i in range(1,number_of_peaks+1): row = dataf.rx(i,True) rowA = np.array(row) newRow.append(rowA[2:]) cvI.append(cv(rowA[2:])) #cv.append(rowA[2:].std()/rowA[2:].mean()) cv_r=robjects.conversion.py2ri(cvI) df_cv = {'CV' : cv_r} dataf_cv = robjects.DataFrame(df_cv) dtf_cv = robjects.r.melt(dataf_cv) d=dataf.cbind(dtf_cv.rx(2)) d.names[tuple(d.colnames).index('value')] = 'CV' #d = base.merge_data_frame(dataf,dtf_cv.rx(2)) utilis.write_csv(d, options.csv_output) dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1))) #n_peak = robjects.IntVector(1,number_of_peaks) gp = ggplot2.ggplot(dc) pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \ ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV") r.X11() pp.plot()
from rpy2.robjects.packages import importr base = importr('base') datasets = importr('datasets') mtcars = datasets.mtcars #-- setupggplot2-end grdevices.png('../../_static/graphics_ggplot2mtcars.png', width = 612, height = 612, antialias="subpixel", type="cairo") #-- ggplot2mtcars-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt', y='mpg') + \ ggplot2.geom_point() pp.plot() #-- ggplot2mtcars-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width = 1000, height = 350, antialias="subpixel", type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \
mtcars = data(datasets).fetch('mtcars')['mtcars'] #-- setupggplot2-end grdevices.png('../../_static/graphics_ggplot2mtcars.png', width=612, height=612, antialias="subpixel", type="cairo") #-- ggplot2mtcars-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt', y='mpg') + \ ggplot2.geom_point() pp.plot() #-- ggplot2mtcars-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width=1000, height=350, antialias="subpixel", type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin
# "index" is equivalent to "names" in R if obj.ndim == 1: res.names = ListVector({'x': ro.conversion.py2ri(obj.index)}) else: res.dimnames = ListVector(ro.conversion.py2ri(obj.index)) return res else: return py2ri_orig(obj) rpy2.robjects.conversion.py2ri = conversion_pydataframe # <codecell> import pandas # <codecell> import rpy2.robjects.lib.ggplot2 as ggplot2 # <codecell> df = pandas.DataFrame({"a":range(10), "b":range(10,20)}) # <codecell> pp = ggplot2.ggplot(df) + ggplot2.aes_string(x="a", y="b") + ggplot2.geom_point() pp.plot() # <codecell>
def plot_volcano_with_r( data, xlabel='Estimated effect (change in H/L ratio)', title='', max_labels=20, color_background='#737373', color_significant='#252525', color_significant_muted='#252525', label_only_large_fc=False, special_labels=None, special_palette=None, base_size=12, label_size=3, x='logFC', y='neg_log10_p_adjust', special_labels_mode='all', xlim=None, skip_labels=None, nudges=None, ): r_data, r_like_data = transform_data_for_ggplot( data, label_only_large_fc=label_only_large_fc, special_labels=special_labels, max_labels=max_labels, special_labels_mode=special_labels_mode, skip_labels=skip_labels, nudges=nudges) plot = r_ggplot2.ggplot(r_data) plot += r_ggplot2.theme_minimal(base_size=base_size) plot += r_ggplot2.theme( **{ 'panel.grid.major': r_ggplot2.element_blank(), 'panel.grid.minor': r_ggplot2.element_blank(), 'panel.border': r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black") }) plot += r_ggplot2.theme( text=r_ggplot2.element_text(family='Helvetica', face='plain')) plot += r_ggplot2.theme( **{ 'plot.title': r_ggplot2.element_text(hjust=0.5), # 'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)), }) aes_points = r_ggplot2.aes_string(x=x, y=y, color='group') scale_points = r_ggplot2.scale_colour_manual( aes_points, values=r_label_palette( r_like_data, special_palette, color_background=color_background, color_significant=color_significant, color_significant_muted=color_significant_muted)) plot += aes_points plot += scale_points if xlim is not None: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim)) else: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits) plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit) plot += r_ggplot2.geom_hline( yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_point(**{'show.legend': False}) aes_text = r_ggplot2.aes_string(label='label') plot += aes_text plot += r_ggrepel.geom_text_repel( aes_text, nudge_x=r_dollar(r_data, 'nudgex'), nudge_y=r_dollar(r_data, 'nudgey'), size=label_size, family='Helvetica', **{ 'show.legend': False, 'point.padding': 0.25, 'min.segment.length': 0, #'max.iter':0, 'segment.color': '#BDBDBD' }, ) plot += r_ggplot2.labs(x=xlabel, y='Adjusted p value (-log10)', title=title) plot.plot()
def main(): ''' maine ''' # Command Line Stuff... myCommandLine = CommandLine() outdir = myCommandLine.args['outDir'] group1 = myCommandLine.args['group1'] group2 = myCommandLine.args['group2'] batch = myCommandLine.args['batch'] matrix = myCommandLine.args['matrix'] prefix = myCommandLine.args['prefix'] formula = myCommandLine.args['formula'] print("running DESEQ2 %s" % prefix, file=sys.stderr) # make the quant DF quantDF = pd.read_table(matrix, header=0, sep='\t', index_col=0) df = pandas2ri.py2ri(quantDF) # import formula formulaDF = pd.read_csv(formula,header=0, sep="\t",index_col=0) sampleTable = pandas2ri.py2ri(formulaDF) if "batch" in list(formulaDF): design = Formula("~ batch + condition") else: design = Formula("~ condition") # import DESeq2 from rpy2.robjects.packages import importr import rpy2.robjects.lib.ggplot2 as ggplot2 methods = importr('methods') deseq = importr('DESeq2') grdevices = importr('grDevices') qqman = importr('qqman') ### RUN DESEQ2 ### R.assign('df', df) R.assign('sampleTable', sampleTable) R.assign('design',design) R('dds <- DESeqDataSetFromMatrix(countData = df, colData = sampleTable, design = design)') R('dds <- DESeq(dds)') R('name <- grep("condition", resultsNames(dds), value=TRUE)') ### ### # Get Results and shrinkage values res = R('results(dds, name=name)') resLFC = R('lfcShrink(dds, coef=name)') vsd = R('vst(dds,blind=FALSE)') resdf = robjects.r['as.data.frame'](res) reslfc = robjects.r['as.data.frame'](resLFC) dds = R('dds') ### Plotting section ### # plot MA and PC stats for the user plotMA = robjects.r['plotMA'] plotDisp = robjects.r['plotDispEsts'] plotPCA = robjects.r['plotPCA'] plotQQ = robjects.r['qq'] # get pca data if "batch" in list(formulaDF): pcaData = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") else: print(vsd) pcaData = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") # arrange data_folder = os.path.join(os.getcwd(), outdir) qcOut = os.path.join(data_folder, "%s_QCplots_%s_v_%s.pdf" % (prefix,group1,group2)) grdevices.pdf(file=qcOut) x = "PC1: %s" % int(percentVar[0]*100) + "%% variance" y = "PC2: %s" % int(percentVar[1]*100) + "%% variance" if "batch" in list(formulaDF): pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() else: pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() pp.plot() plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results") plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrhinkage") plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ") hh = ggplot2.ggplot(resdf) + \ ggplot2.aes_string(x="pvalue") + \ ggplot2.geom_histogram() + \ ggplot2.theme_classic() + \ ggplot2.ggtitle("pvalue distribution") hh.plot() plotDisp(dds, main="Dispersion Estimates") grdevices.dev_off() data_folder = os.path.join(os.getcwd(), outdir) lfcOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results_shrinkage.tsv" % (prefix,group1,group2)) resOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results.tsv" % (prefix,group1,group2)) robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t") robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")
#print "onlysurf" #print onlysurf #colours2 = grdevices.topo_colors(10) colours2 = grdevices.cm_colors(10) #colours2 = grdevices.rainbow(20) #print colours2 #colours = ggplot2.rainbow(54) #bins=10 gp = ggplot2.ggplot(onlysurf) #gp = ggplot2.ggplot(onlyfilts) gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station") gp=gp+ggplot2.scale_colour_gradientn(colours=colours2) gp=gp+ggplot2.geom_text(col="black",offset = 10) gp=gp+ggplot2.geom_point(position="jitter") gp=gp+ggplot2.ggtitle(graphtitle) robjects.r('library(ggmap)') robjects.r('library(mapproj)') robjects.r('map <- get_map(location = "Europe", zoom = 4)') robjects.r('ggmap(map)') #robjects.r('library(maps)') #robjects.r('map("world", interior = FALSE)') #robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)') #gp.plot() '''
print("\nggplot") print("------") import numpy as np import pandas as pd import rpy2.robjects.packages as packages import rpy2.robjects.lib.ggplot2 as ggplot2 import rpy2.robjects as ro # Importando o dataset do R, o mtcars R = ro.r datasets = packages.importr('datasets') mtcars = packages.data(datasets).fetch('mtcars')['mtcars'] # Gerando o gráfico com ggplot gp = ggplot2.ggplot(mtcars) pyplot = (gp + ggplot2.aes_string(x = 'wt', y = 'mpg') + ggplot2.geom_point(ggplot2.aes_string(colour = 'qsec')) + ggplot2.scale_colour_gradient(low = "yellow", high = "red") + ggplot2.geom_smooth(method = 'auto') + ggplot2.labs(title = "mtcars", x = 'wt', y = 'mpg')) pyplot.plot() print("\nAnálise de Variância") print("--------------------") import rpy2.robjects as robjects r = robjects.r controle = robjects.FloatVector([4.17,5.58,5.18,6.11,4.50,4.61, 5.17,4.53,5.33,5.14]) tratamento = robjects.FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,
base = importr('base') mtcars = data(datasets).fetch('mtcars')['mtcars'] #-- setupggplot2-end grdevices.png('../../_static/graphics_ggplot2mtcars.png', width=612, height=612, antialias=ANTIALIAS, type="cairo") #-- ggplot2mtcars-begin gp = ggplot2.ggplot(mtcars) pp = (gp + ggplot2.aes_string(x='wt', y='mpg') + ggplot2.geom_point()) pp.plot() #-- ggplot2mtcars-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width=1000, height=350, antialias=ANTIALIAS, type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin gp = ggplot2.ggplot(dataf_rnorm)
import math, datetime import time import rpy2.robjects.lib.ggplot2 as ggplot2 import rpy2.robjects as ro from rpy2.robjects.packages import importr from rpy2.interactive import process_revents grdevices = importr('grDevices') process_revents.start() base = importr('base') datasets = importr('datasets') mtcars = datasets.__rdata__.fetch('mtcars')['mtcars'] pp = ggplot2.ggplot(mtcars) + ggplot2.aes_string( x='wt', y='mpg', col='factor(cyl)') + ggplot2.geom_point() + ggplot2.geom_smooth( ggplot2.aes_string(group='cyl'), method='lm') #pp.plot() #process_revents.start() print(pp) process_revents.process_revents() while True: time.sleep(1) process_revents.stop()
def as_dataframe (cfg, results, basis): r = robjects.r varis = [] langs = [] probs = [] times = [] threads = [] # speedups, with upper and lower bounds below speedups = [] speedup_lowers = [] speedup_uppers = [] ses = [] # standard errors mems = [] # memory usage langs_ideal = list (cfg.languages) langs_ideal.append ('ideal') probs_ideal = list (cfg.problems) probs_ideal.append ('ideal') for var in cfg.variations: for lang in langs_ideal: # cfg.languages: for prob in probs_ideal: # cfg.problems: for thread in cfg.threads: if lang == 'ideal' and prob == 'ideal': continue elif lang == 'ideal' or prob == 'ideal': varis.append (var) langs.append (pretty_langs[lang]) probs.append (prob) threads.append (thread) speedups.append (thread) speedup_lowers.append (thread) speedup_uppers.append (thread) times.append (0) ses.append(0) mems.append (0) continue varis.append (var) # pretty_varis [var]) langs.append (pretty_langs [lang]) probs.append (prob) threads.append (thread) if var.find('seq') >= 0: thread = cfg.threads[-1] vals = FloatVector (results[thread][prob][var][lang][0]) time = mean (vals) times.append (time) # # time confidence interval # t_result = r['t.test'] (FloatVector(vals), **{" conf.level": 0.999}).rx ('conf.int')[0] ses.append ((t_result[1] - t_result[0])/2) # # memory usage # mem_filename = get_mem_output (lang, prob, var) with open (mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append (float (mem)) # we include dummy data for the sequential case to avoid the # speedup calculation below if var.find('seq') >= 0: speedups.append (1) speedup_lowers.append (1) speedup_uppers.append (1) continue # # speedup values and confidence intervals # seq_vals = results[cfg.threads[-1]][prob][var.replace ('par', 'seq')][lang][0] # sequential base base = FloatVector (seq_vals) # base with p = 1 base_p1 = FloatVector (results[1][prob][var][lang][0]) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 labels = ['Base'] * r.length(base)[0] + ['N']*r.length (vals)[0] df = DataFrame ({'Times': base + vals, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False})[0][0] speedups.append (mean(base) / time) speedup_lowers.append (ratio_test[1][0]) speedup_uppers.append (ratio_test[2][0]) df = robjects.DataFrame({'Language': StrVector (langs), 'Problem': StrVector (probs), 'Variation' : StrVector (varis), 'Threads': IntVector (threads), 'Time': FloatVector (times), 'SE': FloatVector (ses), 'Speedup': FloatVector (speedups), 'SpeedupLower': FloatVector (speedup_lowers), 'SpeedupUpper': FloatVector (speedup_uppers), 'Mem' : FloatVector (mems) }) r.assign ('df', df) r ('save (df, file="performance.Rda")') # reshape the data to make variation not a column itself, but a part of # the other columns describe ie, time, speedup, etc. # # also, remove the 'ideal' problem as we don't want it in this plot. df = r(''' redf = reshape (df, timevar="Variation", idvar = c("Language","Problem","Threads"), direction="wide") redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain")) redf[which(redf$Problem != "ideal"),] ''') r.pdf ('speedup-expertpar-all.pdf', height=6.5, width=10) change_name = 'Language' legendVec = IntVector (range (len (langs_ideal))) legendVec.names = StrVector (langs_ideal) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'SpeedupUpper.expertpar', ymin = 'SpeedupLower.expertpar') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\ robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\ ggplot2.aes_string(x='Threads', y='Speedup.expertpar', group=change_name, color=change_name, shape=change_name) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2), 'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2), 'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10), 'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10), 'legend.text' : ggplot2.theme_text(family = 'serif', size = 10), 'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'aspect.ratio' : 1, }) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Number of cores")') + \ ggplot2.facet_wrap ('Problem', nrow = 2) pp.plot() r['dev.off']()
def line_plot (cfg, var, control, change_name, changing, selector, base_selector, basis): speedups = [] thrds = [] changes = [] lowers = [] uppers = [] for n in cfg.threads: probs.append ('ideal') langs.append ('ideal') speedups.append (n) thrds.append (n) changes.append ('ideal') lowers.append (n) uppers.append (n) for c in changing: sel = selector (c) # sequential base base = FloatVector (base_selector(c)) # base with p = 1 base_p1 = FloatVector (sel(1)) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 for n in cfg.threads: ntimes = FloatVector (sel(n)) # ratio confidence interval labels = ['Base'] * r.length(base)[0] + ['N']*r.length (ntimes)[0] df = DataFrame ({'Times': base + ntimes, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False, 'conf.level': 0.999})[0][0] lowers.append (ratio_test[1][0]) uppers.append (ratio_test[2][0]) mn = mean (ntimes) speedups.append (mean(base) / mn) # plot slowdowns #speedups.append (-mn/base)#(base / mn) thrds.append (n) if change_name == 'Language': changes.append (pretty_langs [c]) else: changes.append (c) df = DataFrame ({'Speedup': FloatVector (speedups), 'Threads': IntVector (thrds), change_name: StrVector (changes), 'Lower': FloatVector (lowers), 'Upper': FloatVector (uppers) }) ideal_changing = ['ideal'] if change_name == 'Language': ideal_changing.extend ([pretty_langs [c] for c in changing]) else: ideal_changing.extend (changing) legendVec = IntVector (range (len (ideal_changing))) legendVec.names = StrVector (ideal_changing) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'Upper', ymin = 'Lower') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=3) +\ ggplot2.aes_string(x='Threads', y='Speedup', group=change_name, color=change_name, shape=change_name) + \ ggplot2.scale_shape_manual(values=legendVec) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2_options () + \ ggplot2_colors () + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Cores")') # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\ pp.plot() r['dev.off']()
annotate1 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.5, color = "red", label = "Mean Annual", parse=FALSE)') annotate2 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.42, label = "'+r_sq_lab+'", color = "red", parse=TRUE)') annotate3 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.34, label = "slope~'+sl+'", color = "red", parse=TRUE)') annotate4 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.7, color = "blue", label = "LGM", parse=FALSE)') annotate5 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.6, color = "blue", label = "'+r_sq_lab_lgm+'", parse=TRUE)') annotate6 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.5, color = "blue", label = "slope~'+sl_lgm+'", parse=TRUE)') pp = ggplot2.ggplot(dat_frame) + \ ggplot2.aes_string(y='discharge', x='areas') + \ ggplot2.ggtitle('Area vs. Sediment Flux') + \ ggplot2.scale_x_log10(x_lab) + \ ggplot2.theme_bw() + \ ggplot2.stat_smooth(method = "lm", formula = 'y ~ x') + \ ggplot2.scale_y_log10(y_lab) + \ annotate1 + \ annotate2 + \ annotate3 + \ annotate4 + \ annotate5 + \ annotate6 + \ ggplot2.geom_point(color='blue') + \ ggplot2.geom_errorbar(ggplot2.aes_string(ymin='min',ymax='max'), data=dat_frame, width=.02, alpha=.3) + \ ggplot2.geom_point(data=dat_frame2,color='red',show_guide='FALSE' ) + \ ggplot2.stat_smooth(data=dat_frame2, method = "lm", formula = 'y ~ x', color='red') grdevices = importr('grDevices') grdevices.pdf(file="area_qs.pdf") pp.plot() grdevices.dev_off()
[x[0] for x in combos_r]) d['time'] = FloatVector([x for x in times]) + FloatVector( [x[0] for x in combos_r]) d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector( [x[1] for x in combos_r]) d['group'] = StrVector( [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width=712, height=512) p.plot() grdevices.dev_off() #base = importr("base") stats = importr('stats')
def rest(): df = q1_median_q3_rep_wide pops = ["pdc", "dc-cd11b", "dc-cd8a"] stats_l = [] for stat, (popa, popb) in product(["Q1", "median", "Q3"], product(pops, pops)): print(stat, popa, popb) popa = "hsc" popb = "pdc" stat = "median" mw_u, pvalue = scipy.stats.mannwhitneyu( [0.8, 0.81, 0.79], [0.4, 0.39, 0.41], # df.query("Population == @popa")[stat].to_numpy(), # df.query("Population == @popb")[stat].to_numpy(), use_continuity=True, alternative="two-sided", ) pvalue stats_l.append([stat, popa, popb, mw_u, pvalue]) stats_df = pd.DataFrame(stats_l).set_axis( ["stat", "popA", "popB", "U", "pvalue"], axis=1) kruskal_format_means = pd.pivot( q1_median_q3_rep_wide.query("Population in @pops"), index="Population", columns="Replicate", values="mean", ) import scikit_posthocs stat, p_value = scipy.stats.kruskal( *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], ) dunn_res_df = scikit_posthocs.posthoc_dunn( kruskal_format_means.to_numpy(), p_adjust='fdr_bh', sort=True, ) stat, pvalue = scipy.stats.f_oneway( *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], ) import statsmodels df = kruskal_format_means.stack().reset_index() kruskal_format_means res = statsmodels.stats.multicomp.pairwise_tukeyhsd( df[0], df['Population'].to_numpy(), alpha=0.05) res.pvalues res.summary() # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F) plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"] results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt" point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png" q1_median_q3_rep_wide ggplot_data = ( q1_median_q3_rep_long.query("Population in @plot_pops").sort_values( "value", ascending=False, ).groupby(["Population", "stat"]).apply( lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1)))) g = (gg.ggplot(ggplot_data) + gg.aes_string( x="Population", y="value", group="group_order", color="stat") + gg.geom_point(position=gg.position_dodge(width=0.5), size=1) + mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x='')) a = 3 rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6))) ut.save_and_display( g, png_path=point_plot_quartiles_png, # additional_formats=tuple(), height=ut.cm(6), width=ut.cm(6), ) q1_median_q3_rep_wide g = ( gg.ggplot( q1_median_q3_rep_wide.query("Population in @plot_pops").assign( sample=lambda df: df["Population"].astype(str) + df[ "Replicate"].astype(str))) + gg.geom_boxplot( gg.aes_string( x="Population", fill="Population", group="sample", lower="Q1", upper="Q3", middle="median", ymin="min1", ymax="max99", # position=gg.position_dodge(width=0.5), ), stat="identity", ) # + mh_rpy2_styling.gg_paper_theme + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) + gg.scale_fill_brewer(guide=False)) a = 3 ut.save_and_display( g, png_path=point_plot_quartiles_png, additional_formats=tuple(), height=ut.cm(6), width=ut.cm(7), ) # image_png2(g, (ut.cm(12), ut.cm(12))) beta_values.loc[:, ("hsc", "1")]
def main(): ''' maine ''' # Command Line Stuff... myCommandLine = CommandLine() outdir = myCommandLine.args['outDir'] group1 = myCommandLine.args['group1'] group2 = myCommandLine.args['group2'] batch = myCommandLine.args['batch'] matrix = myCommandLine.args['matrix'] prefix = myCommandLine.args['prefix'] formula = myCommandLine.args['formula'] # make the quant DF quantDF = pd.read_table(matrix, header=0, sep='\t', index_col=0) df = pandas2ri.py2ri(quantDF) #print(df.head()) # import formula formulaDF = pd.read_csv(formula,header=0, sep="\t",index_col=0) sampleTable = pandas2ri.py2ri(formulaDF) if "batch" in list(formulaDF): design = Formula("~ batch + condition") else: design = Formula("~ condition") #print(sampleTable) # import DESeq2 from rpy2.robjects.packages import importr import rpy2.robjects.lib.ggplot2 as ggplot2 methods = importr('methods') deseq = importr('DESeq2') grdevices = importr('grDevices') qqman = importr('qqman') dds = deseq.DESeqDataSetFromMatrix(countData = df, colData = sampleTable, design = design) dds = deseq.DESeq(dds) cont = robjects.r["grep"]("condition",robjects.r['resultsNames'](dds),value=True) #print(cont) # get results; orient the results for groupA vs B res = deseq.results(dds, name=cont) # results with shrinkage resLFC = deseq.lfcShrink(dds, coef=cont, type="apeglm") resdf = robjects.r['as.data.frame'](res) R.assign('res', res) reslfc = robjects.r['as.data.frame'](resLFC) # plot MA and PC stats for the user plotMA = robjects.r['plotMA'] plotDisp = robjects.r['plotDispEsts'] plotPCA = robjects.r['plotPCA'] plotQQ = robjects.r['qq'] vsd = robjects.r['vst'](dds, blind=robjects.r['F']) # get pca data if "batch" in list(formulaDF): pcaData = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") else: print(vsd) pcaData = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") # arrange grdevices.pdf(file="./%s/%s_QCplots_%s_v_%s.pdf" % (outdir,prefix,group1,group2)) x = "PC1: %s" % int(percentVar[0]*100) + "%% variance" y = "PC2: %s" % int(percentVar[1]*100) + "%% variance" if "batch" in list(formulaDF): pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() pp.plot() else: pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() pp.plot() plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results") #plotMA(res, main="MA-plot results") plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrrhinkage") #plotMA(resLFC, main="MA-plot LFCSrrhinkage") plotQQ(resdf.rx2('pvalue'), main="pvalue QQ") plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ") hh = ggplot2.ggplot(resdf) + \ ggplot2.aes_string(x="pvalue") + \ ggplot2.geom_histogram() + \ ggplot2.theme_classic() hh.plot() plotDisp(dds, main="Dispersion Estimates") grdevices.dev_off() lfcOut = "./%s/%s_%s_v_%s_deseq2_results_shrinkage.tsv" % (outdir,prefix,group1,group2) resOut = "./%s/%s_%s_v_%s_deseq2_results.tsv" % (outdir,prefix,group1,group2) robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t") robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")