def interval(locus_table, interval_table, intervals, loci, boxplot = True): qry = get_interval_query(intervals, loci, locus_table, interval_table) frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry)) # because we're sorting by interval, which is a factor, we need to # explicitly re-sort the data by the first integer value # of the interval. This is a bit cumbersome, because sorting # in R is less than pleasant. sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1])) robjects.r(sort_string) gg_frame = ggplot2.ggplot(robjects.r('''data''')) if boxplot: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \ ggplot2.geom_boxplot(**{ 'outlier.size':0, 'alpha':0.3 } ) + \ ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \ alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') else: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi', fill='locus') + ggplot2.geom_bar() + \ ggplot2.facet_wrap(robjects.Formula('~ locus')) + \ ggplot2.opts(**{ 'axis.text.x':ggplot2.theme_text(angle = -90, hjust = 0), 'legend.position':'none' }) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') return plot
def plot(request): r = robjects.r ungram = Sentence.objects.filter(grammatical=False).exclude( rating='N').values_list('similarity', flat=True) gram = Sentence.objects.filter(grammatical=True).exclude( rating='N').values_list('similarity', flat=True) gram_r = robjects.FloatVector(gram) ungram_r = robjects.FloatVector(ungram) df = robjects.r["data.frame"] gram_df = df(gram="GRAM", similarity=gram_r) ungram_df = df(gram="UNGRAM", similarity=ungram_r) rbind = r['rbind'] data = rbind(gram_df, ungram_df) pp = ggplot2.ggplot(data) + \ ggplot2.aes_string(x="gram", y="similarity") + \ ggplot2.geom_boxplot() grdevices = importr('grDevices') grdevices.png(file="data.png", width=580, height=512) pp.plot() grdevices.dev_off() image_data = open("data.png", "rb").read() return HttpResponse(image_data, mimetype="image/png")
def plot(request): r = robjects.r ungram = Sentence.objects.filter(grammatical=False).exclude(rating='N').values_list('similarity', flat=True) gram = Sentence.objects.filter(grammatical=True).exclude(rating='N').values_list('similarity', flat=True) gram_r = robjects.FloatVector(gram) ungram_r = robjects.FloatVector(ungram) df = robjects.r["data.frame"] gram_df = df(gram="GRAM", similarity=gram_r) ungram_df = df(gram="UNGRAM", similarity=ungram_r) rbind = r['rbind'] data = rbind(gram_df, ungram_df) pp = ggplot2.ggplot(data) + \ ggplot2.aes_string(x="gram", y="similarity") + \ ggplot2.geom_boxplot() grdevices = importr('grDevices') grdevices.png(file="data.png", width=580, height=512) pp.plot() grdevices.dev_off() image_data = open("data.png", "rb").read() return HttpResponse(image_data, mimetype="image/png")
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals, image_file_type): samp_vector = ["set1" for i in range(len(samp_set1_vals))] samp_vector.extend(["set2" for i in range(len(samp_set2_vals))]) dframe = robjects.DataFrame({ "sample": robjects.StrVector(samp_vector), "value": robjects.FloatVector(samp_set1_vals + samp_set2_vals) }) gp = ggplot2.ggplot(dframe) pp = gp + \ ggplot2.aes_string(x="sample", y='value') + \ ggplot2.geom_boxplot() +\ ggplot2.geom_jitter() +\ ggplot2.theme_bw() if image_file_type == "pdf": grdevices.pdf(file=plotName) else: grdevices.png(file=plotName, width=512, height=512) pp.plot() grdevices.dev_off()
def render_plot(gp, args): """Render a plot using ggplot :gp: A base ggplot2 object :x: The x value expression :y: The y value expression :type: The type of plot to make """ args = util.Namespace(args) import rpy2.robjects.lib.ggplot2 as ggplot2 pp = gp + ggplot2.aes_string(x=args.x, y=args.y) if args.type == 'points': pp += ggplot2.geom_point() elif args.type == 'lines': pp += ggplot2.geom_line() elif args.type == 'boxplot': pp += ggplot2.geom_boxplot() else: raise Exception("{0} not implemented".format(args.type)) if args.facets is not None: try: pp += ggplot2.facet_grid(ro.Formula(args.facets)) except Exception: pass try: pp.plot() except Exception: pass
def BoxPlot_One(self, metabolite): #print(self.raw_data) r('graphics.off()') gp = ggplot2.ggplot(self.raw_data) pp = gp + \ ggplot2.aes_string(x=self.metadata, y='`'+self.metabolite_dict[metabolite]+'`') + \ ggplot2.geom_boxplot() pp.plot()
def boxPlot(self, dataframe, filename, x_parm, y_parm): grdevices.png(file=filename, width=512, height=512) data = ggplot2.ggplot(dataframe) aes = ggplot2.aes_string(x=x_parm,y=y_parm,) geom = ggplot2.geom_boxplot(alpha = 0.7,fill="aquamarine3") gg = data + aes + geom gg.plot() grdevices.dev_off()
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile=None, height=120, fsize=12): for expt_idx, ex in enumerate(expt_names): feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz') model_file = os.path.join(model_dir, pref + ex + '_model.pkl') model = read_model(model_file) (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file) if expt_idx == 0: feat_names = tmp_feat_names clf_coef = model.clf_coef() reg_coef = model.reg_coef() else: assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names))) clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1) reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1) nexpt = expt_idx + 1 # Now clf_coef has one row per coefficient and one column per experiment. # The reshape below will read the data row-first. df = pd.DataFrame({ 'feature': np.repeat(feat_names, nexpt), 'Classification': np.reshape(clf_coef, (clf_coef.size, )), 'Regression': np.reshape(reg_coef, (reg_coef.size, )) }) df2 = pd.melt(df, id_vars='feature', var_name='fun') r_df = com.convert_to_r_dataframe(df2) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \ ggplot2.facet_wrap('fun', scales = 'free_y') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \ ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1), 'axis.text.y':ggplot2.element_text(size = fsize), 'strip.text.x':ggplot2.element_text(size = fsize + 1)}) w = max(22 * nexpt, 80) if outfile is None: gp.plot() else: ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm') return df
def boxPlot(self, dataframe, filename, x_parm, y_parm): grdevices.png(file=filename, width=512, height=512) data = ggplot2.ggplot(dataframe) aes = ggplot2.aes_string( x=x_parm, y=y_parm, ) geom = ggplot2.geom_boxplot(alpha=0.7, fill="aquamarine3") gg = data + aes + geom gg.plot() grdevices.dev_off()
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'): """Makes boxplots of cross-validation results for different parameter settings""" ncv = len(set(list(pandas_df['title']))) r_df = com.convert_to_r_dataframe(pandas_df) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \ ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1), 'axis.text.y':ggplot2.element_text(size = fsize)}) w = max(5 * ncv, max_width) ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
def runBoruta(): base.load("Rcode/zscores.RData") base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/trainClassifier/Rcode/borutaRelevance.R') outputBoruta = globalenv['findRelevant'](globalenv['massallfeatures'], globalenv['nonmassallfeatures']) # generate boxplot comparison of relevant mass features vs. the same non-mass feature plotgp = ggplot2.ggplot(outputBoruta.rx2("masszscore_selected")) + \ ggplot2.aes_string(x='MorN', y='zscores', fill = 'factor(MorN)') + \ ggplot2.geom_boxplot() + \ ggplot2.opts(title = "Comparison of Z-scores for Mass confirmed features", y="Z-scores") plotgp.plot() return
def makeDistanceBox( alldata, figurename, feature="distance") : alldata["distance"] = alldata.het + alldata.hom r_dataframe = com.convert_to_r_dataframe(alldata) p = ggplot2.ggplot(r_dataframe) + \ ggplot2.aes_string(x="factor(continent)", y=feature) + \ ggplot2.geom_boxplot() + \ ggplot2.ggtitle("Distance from Reference by Continent") + \ ggplot2.theme(**mytheme) #+ \ #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \ #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') ) grdevices.png(figurename) p.plot() grdevices.dev_off()
def plot_summary(barcodes_obs, barcode_table, directory, expt_id): barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table) df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches}) p = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \ ggplot2.geom_boxplot(outlier_size = 0) + \ ggplot2.geom_jitter() + \ ggplot2.ggtitle(label = expt_id) + \ ggplot2.ggplot2.xlab(label = "") + \ ggplot2.scale_y_continuous(name = "Count\n(million reads)") filename = "{0}/{1}.png".format(directory, expt_id) grdevices.png(filename=filename, width=4, height=5, unit='in', res=300) p.plot() grdevices.dev_off()
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel): grdevices.pdf(file=plot_file_path) gp = ggplot2.ggplot(data) pp = gp \ + ggplot2.aes_string(x='genotype', y='abundance') \ + ggplot2.geom_boxplot() \ + ggplot2.ggtitle(title) \ + ggplot2.labs(x=xlabel, y=ylabel) \ + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \ + ggplot2.geom_point() pp.plot() grdevices.dev_off()
def ridge_cv_plot(val_err, lam_range): """ Source: http://rpy.sourceforge.net/rpy2/doc-2.3/html/graphics.html """ base = importr('base') df = pd.DataFrame(val_err, columns = lam_range) df = pd.melt(df) df_r = com.convert_to_r_dataframe(df) # Create boxplot gp = ggplot2.ggplot(df_r) pp = gp + \ ggplot2.aes_string(x='factor(variable)', y='value') + \ ggplot2.geom_boxplot() + \ ggplot2.ggtitle("Validation Error by Lambda") pp.plot() return
def BoxPlot_All(self): #print(self.metabolite_list) r('graphics.off()') plots = [] for i in range(1, len(self.metabolite_list) + 1): if i % 4 == 0 and i > 0: grDevices.X11() gridExtra.grid_arrange(*plots, nrow=2, ncol=2) plots = [] gp = ggplot2.ggplot(self.raw_data) plots.append(gp + \ ggplot2.aes_string(x=self.metadata, y='`'+str(i)+'`') + \ ggplot2.geom_boxplot()) grDevices.X11() gridExtra.grid_arrange(*plots, nrow=2, ncol=2)
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350): """Creates boxplots of the thresholds used with each feature.""" df = pd.DataFrame({'motif':motif_names, 'thresh':thresh}) df = df[df['thresh'] > 1] df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False) fsize = 10 r_df = com.convert_to_r_dataframe(df) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \ ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1), 'strip.text.x':ggplot2.element_text(size = fsize + 1)}) for ext in ['.pdf', '.png']: ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext), plot = gp, width = width, height = 300, unit = 'mm')
def plot_cv_r2(pandas_df, outfile, fsize=10, height=120, max_width=50, xlab='Parameters'): """Makes boxplots of cross-validation results for different parameter settings""" ncv = len(set(list(pandas_df['title']))) r_df = com.convert_to_r_dataframe(pandas_df) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \ ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1), 'axis.text.y':ggplot2.element_text(size = fsize)}) w = max(5 * ncv, max_width) ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm')
def compare_mean_boxplot(locus_table, interval_table, intervals, loci, names, rows): frame = get_r_data_by_top(locus_table, interval_table, intervals, names, rows) if len(intervals) > 1: sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1])) robjects.r(sort_string) gg_frame = ggplot2.ggplot(robjects.r('''data''')) plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \ ggplot2.geom_boxplot(ggplot2.aes_string(fill = 'factor(db)'), **{ 'outlier.size':3, 'outlier.colour':'#767676', 'outlier.alpha':0.3, 'alpha':0.6 } ) + \ ggplot2.scale_y_continuous('mean phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') + \ ggplot2.scale_fill_brewer("database", palette='Blues') return plot
def plot_thresh_distr(motif_names, thresh, out_dir, width=350): """Creates boxplots of the thresholds used with each feature.""" df = pd.DataFrame({'motif': motif_names, 'thresh': thresh}) df = df[df['thresh'] > 1] df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False) fsize = 10 r_df = com.convert_to_r_dataframe(df) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \ ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1), 'strip.text.x':ggplot2.element_text(size = fsize + 1)}) for ext in ['.pdf', '.png']: ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext), plot=gp, width=width, height=300, unit='mm')
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile = None, height = 120, fsize = 12): for expt_idx, ex in enumerate(expt_names): feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz') model_file = os.path.join(model_dir, pref + ex + '_model.pkl') model = read_model(model_file) (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file) if expt_idx == 0: feat_names = tmp_feat_names clf_coef = model.clf_coef() reg_coef = model.reg_coef() else: assert(all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names))) clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis = 1) reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis = 1) nexpt = expt_idx + 1 # Now clf_coef has one row per coefficient and one column per experiment. # The reshape below will read the data row-first. df = pd.DataFrame({'feature':np.repeat(feat_names, nexpt), 'Classification':np.reshape(clf_coef, (clf_coef.size,)), 'Regression':np.reshape(reg_coef, (reg_coef.size,))}) df2 = pd.melt(df, id_vars = 'feature', var_name = 'fun') r_df = com.convert_to_r_dataframe(df2) gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \ ggplot2.facet_wrap('fun', scales = 'free_y') + \ ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \ ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1), 'axis.text.y':ggplot2.element_text(size = fsize), 'strip.text.x':ggplot2.element_text(size = fsize + 1)}) w = max(22 * nexpt, 80) if outfile is None: gp.plot() else: ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm') return df
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals, image_file_type): samp_vector = ["set1" for i in range(len(samp_set1_vals))] samp_vector.extend(["set2" for i in range(len(samp_set2_vals))]) dframe = robjects.DataFrame({"sample":robjects.StrVector(samp_vector), "value":robjects.FloatVector(samp_set1_vals + samp_set2_vals)}) gp = ggplot2.ggplot(dframe) pp = gp + \ ggplot2.aes_string(x="sample", y='value') + \ ggplot2.geom_boxplot() +\ ggplot2.geom_jitter() +\ ggplot2.theme_bw() if image_file_type == "pdf": grdevices.pdf(file=plotName) else: grdevices.png(file=plotName, width=512, height=512) pp.plot() grdevices.dev_off()
pp.plot(vp=vp) #-- ggplot2geomhexbin-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geomboxplot.png', width=612, height=612, antialias="subpixel", type="cairo") #-- ggplot2geomboxplot-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='factor(cyl)', y='mpg') + \ ggplot2.geom_boxplot() pp.plot() #-- ggplot2geomboxplot-end grdevices.dev_off() #-- ggplot2geomhistogram-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt') + \ ggplot2.geom_histogram() #pp.plot() #-- ggplot2geomhistogram-end
pp.plot(vp = vp) #-- ggplot2geomhexbin-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geomboxplot.png', width = 612, height = 612, antialias="subpixel", type="cairo") #-- ggplot2geomboxplot-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='factor(cyl)', y='mpg') + \ ggplot2.geom_boxplot() pp.plot() #-- ggplot2geomboxplot-end grdevices.dev_off() #-- ggplot2geomhistogram-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt') + \ ggplot2.geom_histogram() #pp.plot() #-- ggplot2geomhistogram-end
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df=self.df #import math, datetime grdevices = importr('grDevices') if not title: title=fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group) elif col: pp+=ggplot2.aes_string(x=x, y=y,col=col) elif group: pp+=ggplot2.aes_string(x=x, y=y,group=group) else: pp+=ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue') else: pp+=ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter') else: pp+=ggplot2.geom_point(size=size,position='jitter') else: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size) else: pp+=ggplot2.geom_point(size=size) if boxplot2: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA") else: pp+=ggplot2.geom_boxplot(color='blue') if smooth: if smooth=='lm': if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se) else: if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,se=se) if density: pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..')) if line: pp+=ggplot2.geom_line(position='jitter') pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} ) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp+=ggplot2.scale_colour_hue() if flip: pp+=ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: "+fn
# print str(a) try: if dsumFC.has_key(drug): dsumFC[drug]['Fold_Change'].append(math.log10(float(val))) dsumY[drug]['Year'].append(yr) else: dsumFC[drug]= {'Fold_Change': [math.log10(float(val)),]} dsumY[drug]= {'Year': [yr,]} except: print "FAILURE: dsumFC="+str(dsumFC)+"\n\ndsumY="+str(dsumY) sys.exit() drugs = dsumFC.keys() for x in drugs: od = rlc.OrdDict([('Fold_Change',robjects.FloatVector(dsumFC[x]['Fold_Change'])),('Year',robjects.FactorVector(dsumY[x]['Year'])),('Drug',robjects.FactorVector(x))]) grdevices.pdf(file="drugs.pdf",width=7,height=7) dataf = robjects.DataFrame(od) gp3 = ggplot2.ggplot(dataf) pp3 = gp3 + ggplot2.scale_fill_brewer(palette='BrBG',name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') + ggplot2.geom_boxplot() + ggplot2.opts(title = x+" Yearly Trend") # pp3 = gp3 + ggplot2.scale_colour_hue(h=base.c(180,270),name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') + ggplot2.geom_boxplot() + ggplot2.opts(title = x+" Yearly Trend") #+ ggplot2.scale_y_log10() pp3.plot() grdevices.dev_off() f.close() print "\nfinished\n"
def rest(): df = q1_median_q3_rep_wide pops = ["pdc", "dc-cd11b", "dc-cd8a"] stats_l = [] for stat, (popa, popb) in product(["Q1", "median", "Q3"], product(pops, pops)): print(stat, popa, popb) popa = "hsc" popb = "pdc" stat = "median" mw_u, pvalue = scipy.stats.mannwhitneyu( [0.8, 0.81, 0.79], [0.4, 0.39, 0.41], # df.query("Population == @popa")[stat].to_numpy(), # df.query("Population == @popb")[stat].to_numpy(), use_continuity=True, alternative="two-sided", ) pvalue stats_l.append([stat, popa, popb, mw_u, pvalue]) stats_df = pd.DataFrame(stats_l).set_axis( ["stat", "popA", "popB", "U", "pvalue"], axis=1) kruskal_format_means = pd.pivot( q1_median_q3_rep_wide.query("Population in @pops"), index="Population", columns="Replicate", values="mean", ) import scikit_posthocs stat, p_value = scipy.stats.kruskal( *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], ) dunn_res_df = scikit_posthocs.posthoc_dunn( kruskal_format_means.to_numpy(), p_adjust='fdr_bh', sort=True, ) stat, pvalue = scipy.stats.f_oneway( *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], ) import statsmodels df = kruskal_format_means.stack().reset_index() kruskal_format_means res = statsmodels.stats.multicomp.pairwise_tukeyhsd( df[0], df['Population'].to_numpy(), alpha=0.05) res.pvalues res.summary() # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F) plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"] results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt" point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png" q1_median_q3_rep_wide ggplot_data = ( q1_median_q3_rep_long.query("Population in @plot_pops").sort_values( "value", ascending=False, ).groupby(["Population", "stat"]).apply( lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1)))) g = (gg.ggplot(ggplot_data) + gg.aes_string( x="Population", y="value", group="group_order", color="stat") + gg.geom_point(position=gg.position_dodge(width=0.5), size=1) + mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x='')) a = 3 rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6))) ut.save_and_display( g, png_path=point_plot_quartiles_png, # additional_formats=tuple(), height=ut.cm(6), width=ut.cm(6), ) q1_median_q3_rep_wide g = ( gg.ggplot( q1_median_q3_rep_wide.query("Population in @plot_pops").assign( sample=lambda df: df["Population"].astype(str) + df[ "Replicate"].astype(str))) + gg.geom_boxplot( gg.aes_string( x="Population", fill="Population", group="sample", lower="Q1", upper="Q3", middle="median", ymin="min1", ymax="max99", # position=gg.position_dodge(width=0.5), ), stat="identity", ) # + mh_rpy2_styling.gg_paper_theme + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) + gg.scale_fill_brewer(guide=False)) a = 3 ut.save_and_display( g, png_path=point_plot_quartiles_png, additional_formats=tuple(), height=ut.cm(6), width=ut.cm(7), ) # image_png2(g, (ut.cm(12), ut.cm(12))) beta_values.loc[:, ("hsc", "1")]
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df = self.df #import math, datetime grdevices = importr('grDevices') if not title: title = fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp += ggplot2.aes_string(x=x, y=y, col=col, group=group) elif col: pp += ggplot2.aes_string(x=x, y=y, col=col) elif group: pp += ggplot2.aes_string(x=x, y=y, group=group) else: pp += ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue') else: pp += ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size, position='jitter') else: pp += ggplot2.geom_point(size=size, position='jitter') else: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size) else: pp += ggplot2.geom_point(size=size) if boxplot2: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue', outlier_colour="NA") else: pp += ggplot2.geom_boxplot(color='blue') if smooth: if smooth == 'lm': if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, method='lm', se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, method='lm', se=se) else: if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, se=se) if density: pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..')) if line: pp += ggplot2.geom_line(position='jitter') pp += ggplot2.opts( **{ 'title': title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24, hjust=1) }) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp += ggplot2.scale_colour_hue() if flip: pp += ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: " + fn