def _plt_distr(dat, col, title='', splitBy_pfill=True, pfill='label', independentpdf=False, fname='xdistr.pdf'): df = dat[dat[pfill] != 'NA'] ## remove invalid pairs n = len(df) df = { col: robjects.FloatVector(list(df[col])), pfill: robjects.StrVector(list(df[pfill])) } df = robjects.DataFrame(df) pp = ggplot2.ggplot(df) + \ ggplot2.ggtitle('%s [Total = %s]' % (title, n)) ## Plot1: counts if splitBy_pfill: p1 = pp + ggplot2.aes_string(x=col, fill=pfill) else: p1 = pp + ggplot2.aes_string(x=col) ## Plot2: density if splitBy_pfill: p2 = pp + ggplot2.aes_string(x=col, fill=pfill, y='..density..') else: p2 = pp + ggplot2.aes_string(x=col, y='..density..') p2 = p2 + ggplot2.geom_density(alpha=.5, origin=-500) if col == 'distance': p1 = p1 + \ ggplot2.geom_histogram(binwidth=1000, alpha=.5, position='identity', origin=-500) + \ ggplot2.xlim(-1000, 51000) p2 = p2 + \ ggplot2.geom_histogram(binwidth=1000, alpha=.33, position='identity', origin=-500) + \ ggplot2.xlim(-1000, 51000) else: p1 = p1 + \ ggplot2.geom_histogram(alpha=.5, position='identity') p2 = p2 + \ ggplot2.geom_histogram(alpha=.33, position='identity') if col == 'correlation': p1 = p1 + ggplot2.xlim(-1.1, 1.1) p2 = p2 + ggplot2.xlim(-1.1, 1.1) if independentpdf: grdevices = importr('grDevices') grdevices.pdf(file=fname) p1.plot() p2.plot() grdevices.dev_off() else: p1.plot() p2.plot() return
def makeDistancePlot( alldata, figurename, feature="distance") : alldata["distance"] = alldata.het + alldata.hom r_dataframe = com.convert_to_r_dataframe(alldata) p = ggplot2.ggplot(r_dataframe) + \ ggplot2.aes_string(x=feature ) + \ ggplot2.geom_density(ggplot2.aes_string(fill="factor(continent)")) + \ ggplot2.ggtitle("Distance from Reference by Continent") + \ ggplot2.theme(**mytheme) #+ \ #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \ #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') ) grdevices.png(figurename) p.plot() grdevices.dev_off()
grid.newpage() grid.viewport(layout=grid.layout(1, 2)).push() gp = ggplot2.ggplot(dataf_rnorm) vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1}) pp = gp + \ ggplot2.aes_string(x='value', col='factor(mean)') + \ ggplot2.geom_freqpoly() pp.plot(vp=vp) vp = grid.viewport(**{'layout.pos.col': 2, 'layout.pos.row': 1}) #-- ggplot2geomfreqpolyfillcyl-begin pp = gp + \ ggplot2.aes_string(x='value', fill='factor(mean)') + \ ggplot2.geom_density(alpha = 0.5) #-- ggplot2geomfreqpolyfillcyl-end pp.plot(vp=vp) grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geompointandrug.png', width=612, height=612, antialias="subpixel", type="cairo") #-- ggplot2geompointandrug-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt', y='mpg') + \
grid.newpage() grid.viewport(layout=grid.layout(1, 2)).push() gp = ggplot2.ggplot(dataf_rnorm) vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1}) pp = gp + \ ggplot2.aes_string(x='value', col='factor(mean)') + \ ggplot2.geom_freqpoly() pp.plot(vp = vp) vp = grid.viewport(**{'layout.pos.col':2, 'layout.pos.row': 1}) #-- ggplot2geomfreqpolyfillcyl-begin pp = gp + \ ggplot2.aes_string(x='value', fill='factor(mean)') + \ ggplot2.geom_density(alpha = 0.5) #-- ggplot2geomfreqpolyfillcyl-end pp.plot(vp = vp) grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geompointandrug.png', width = 612, height = 612, antialias="subpixel", type="cairo") #-- ggplot2geompointandrug-begin gp = ggplot2.ggplot(mtcars) pp = gp + \ ggplot2.aes_string(x='wt', y='mpg') + \ ggplot2.geom_point() + \
def _plt_percountr(dat, independentpdf=False, fname='xpercount.pdf'): def _filt_dat(dat, item, getlabel=True): df = pd.DataFrame(dat[item].value_counts()) df.columns = ['count'] if getlabel: df['label'] = [ list(dat[dat[item] == i]['label'])[0] for i in df.index ] n = len(df) mx = max(df['count']) return df, n, mx dat = dat[dat['label'] != 'NA'] ## NUMBER OF MIRNA PER TSS df, n, mx = _filt_dat(dat, 'tss', False) df = {'count': robjects.IntVector(df['count'])} df = robjects.DataFrame(df) pt = ggplot2.ggplot(df) + \ ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \ ggplot2.xlim(-.5, mx+1) + \ ggplot2.aes_string(x='count') + \ ggplot2.ggtitle('TSS [Total = %s]' % n) + \ ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx) pt_den = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='count', y='..density..') + \ ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \ ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \ ggplot2.ggtitle('TSS [Total = %s]' % n) + \ ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx) ## NUMBER OF TSS PER MIRNA df, n, mx = _filt_dat(dat, 'mirna') df = { 'count': robjects.IntVector(df['count']), 'label': robjects.StrVector(df['label']) } df = robjects.DataFrame(df) _pm = ggplot2.ggplot(df) + \ ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \ ggplot2.xlim(-.5, mx+1) + \ ggplot2.ggtitle('miRNA [Total = %s]' % n) _pm_den = ggplot2.ggplot(df) + \ ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \ ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \ ggplot2.ggtitle('miRNA [Total = %s]' % n) ## not split by label pm = _pm + ggplot2.aes_string(x='count') pm_den = _pm_den + ggplot2.aes_string(x='count', y='..density..') ## split by label pms = _pm + ggplot2.aes_string(x='count', fill='label') pm_dens = _pm_den + ggplot2.aes_string( x='count', fill='label', y='..density..') ## add xlabelling (need to be added after aes_string) _xlab = ggplot2.labs(x='Number of TSS per miRNA (max = %s)' % mx) pm += _xlab pm_den += _xlab pms += _xlab pm_dens += _xlab if independentpdf: grdevices = importr('grDevices') grdevices.pdf(fname) pt.plot() pt_den.plot() pm.plot() pm_den.plot() pms.plot() pm_dens.plot() grdevices.dev_off() else: pt.plot() pt_den.plot() pm.plot() pm_den.plot() pms.plot() pm_dens.plot() return
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df = self.df #import math, datetime grdevices = importr('grDevices') if not title: title = fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp += ggplot2.aes_string(x=x, y=y, col=col, group=group) elif col: pp += ggplot2.aes_string(x=x, y=y, col=col) elif group: pp += ggplot2.aes_string(x=x, y=y, group=group) else: pp += ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue') else: pp += ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size, position='jitter') else: pp += ggplot2.geom_point(size=size, position='jitter') else: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size) else: pp += ggplot2.geom_point(size=size) if boxplot2: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue', outlier_colour="NA") else: pp += ggplot2.geom_boxplot(color='blue') if smooth: if smooth == 'lm': if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, method='lm', se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, method='lm', se=se) else: if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, se=se) if density: pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..')) if line: pp += ggplot2.geom_line(position='jitter') pp += ggplot2.opts( **{ 'title': title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24, hjust=1) }) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp += ggplot2.scale_colour_hue() if flip: pp += ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: " + fn
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df=self.df #import math, datetime grdevices = importr('grDevices') if not title: title=fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group) elif col: pp+=ggplot2.aes_string(x=x, y=y,col=col) elif group: pp+=ggplot2.aes_string(x=x, y=y,group=group) else: pp+=ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue') else: pp+=ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter') else: pp+=ggplot2.geom_point(size=size,position='jitter') else: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size) else: pp+=ggplot2.geom_point(size=size) if boxplot2: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA") else: pp+=ggplot2.geom_boxplot(color='blue') if smooth: if smooth=='lm': if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se) else: if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,se=se) if density: pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..')) if line: pp+=ggplot2.geom_line(position='jitter') pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} ) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp+=ggplot2.scale_colour_hue() if flip: pp+=ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: "+fn
# re-index to avoid duplicate row.names in Rdf samples.index = npy.arange(len(samples)) samplesgrouped = samples.groupby(['model']) variances = samplesgrouped['Zweighted'].aggregate(npy.var) print variances print variances['BG'] / variances['BS'] print estimatesum(samples) print samplesgrouped['Zweighted'].aggregate(estimatesum) print trueZnsum # grdevices.png(file="sampled-Z.png", width=4, height=3, units="in", res=300) rsamples = com.convert_to_r_dataframe(samples) pp = ggplot2.ggplot(rsamples) + \ ggplot2.aes_string(x='Z', color='factor(model)') + \ ggplot2.scale_colour_discrete(name="model") + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10() # ggplot2.scale_x_continuous(limits=FloatVector((0, 1))) pp.plot() # grdevices.dev_off() def makeestimate(sampler, numsamples, **kwargs): samples = sample(sampler, numsamples, **kwargs) return estimatesum(samples['Zweighted']) def makeestimates(sampler, numsamples, numestimates, **kwargs): estimates = [ makeestimate(sampler, numsamples, **kwargs) for _ in xrange(numestimates)]
emdf = pd.DataFrame({ 'BSdists' : distsbs, 'BGdists' : distsbg, 'truesums' : truesums, 'varratios' : varratios, }) # Plot sampled Z logging.info('Plotting sampled Zn') grdevices.png(file="sampled-Z.png", width=4, height=3, units="in", res=300) rsamples = com.convert_to_r_dataframe(samples) pp = ggplot2.ggplot(rsamples) + \ ggplot2.aes_string(x='Z', color='factor(model)') + \ ggplot2.scale_colour_discrete(name="model") + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10() # ggplot2.scale_x_continuous(limits=FloatVector((0, 1))) pp.plot() grdevices.dev_off() # Plot likelihood ratios logging.info('Plotting likelihood ratios from binding site samples') grdevices.png(file="sampled-ratios.png", width=4, height=3, units="in", res=300) rsamplesbs = com.convert_to_r_dataframe(samples[samples['model'] == 'BS']) pp = ggplot2.ggplot(rsamplesbs) + \ ggplot2.aes_string(x='ir') + \ ggplot2.geom_density() + \ ggplot2.scale_x_log10() pp.plot()