def plot_squiggle(args, filename, start_times, mean_signals): """ Use rpy2 to create a squiggle plot of the read """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] total_time = start_times[-1] - start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([t - t_0 for t in start_times]) r_mean_signals = robjects.FloatVector(mean_signals) # infer the appropriate number of events given the number of facets num_events = len(r_mean_signals) events_per_facet = (num_events / args.num_facets) + 1 # dummy variable to control faceting facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))]) # make a data frame of the start times and mean signals d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category} df = robjects.DataFrame(d) gp = ggplot2.ggplot(df) if not args.theme_bw: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) else: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \ + ggplot2.theme_bw() if args.saveas is not None: plot_file = os.path.basename(filename) + "." + args.saveas if os.path.isfile(plot_file): raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file)) if args.saveas == "pdf": grdevices.pdf(plot_file, width = 8.5, height = 11) elif args.saveas == "png": grdevices.png(plot_file, width = 8.5, height = 11, units = "in", res = 300) pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def _plt_distr(dat, col, title='', splitBy_pfill=True, pfill='label', independentpdf=False, fname='xdistr.pdf'): df = dat[dat[pfill] != 'NA'] ## remove invalid pairs n = len(df) df = { col: robjects.FloatVector(list(df[col])), pfill: robjects.StrVector(list(df[pfill])) } df = robjects.DataFrame(df) pp = ggplot2.ggplot(df) + \ ggplot2.ggtitle('%s [Total = %s]' % (title, n)) ## Plot1: counts if splitBy_pfill: p1 = pp + ggplot2.aes_string(x=col, fill=pfill) else: p1 = pp + ggplot2.aes_string(x=col) ## Plot2: density if splitBy_pfill: p2 = pp + ggplot2.aes_string(x=col, fill=pfill, y='..density..') else: p2 = pp + ggplot2.aes_string(x=col, y='..density..') p2 = p2 + ggplot2.geom_density(alpha=.5, origin=-500) if col == 'distance': p1 = p1 + \ ggplot2.geom_histogram(binwidth=1000, alpha=.5, position='identity', origin=-500) + \ ggplot2.xlim(-1000, 51000) p2 = p2 + \ ggplot2.geom_histogram(binwidth=1000, alpha=.33, position='identity', origin=-500) + \ ggplot2.xlim(-1000, 51000) else: p1 = p1 + \ ggplot2.geom_histogram(alpha=.5, position='identity') p2 = p2 + \ ggplot2.geom_histogram(alpha=.33, position='identity') if col == 'correlation': p1 = p1 + ggplot2.xlim(-1.1, 1.1) p2 = p2 + ggplot2.xlim(-1.1, 1.1) if independentpdf: grdevices = importr('grDevices') grdevices.pdf(file=fname) p1.plot() p2.plot() grdevices.dev_off() else: p1.plot() p2.plot() return
def plotStats(data, outFolder, tiles, prop="qual", prefix="", high="yellow", low="blue", pdf=False, detail=True): #overview plot p = ggplot.ggplot(data) p = p + ggplot.aes_string(x="x", y="y", col=prop) \ + ggplot.geom_point(size=0.1) \ + ggplot.facet_wrap(robjects.Formula("~ tile")) \ + ggplot.scale_colour_gradient(high=high, low=low) \ + ggplot.ggtitle("Overview %s" % (prop)) if prefix: fileName = "%s_overview_%s.png" % (prefix, prop) else: fileName = "overview_%s.png" % (prop) p.save(os.path.join(outFolder, fileName), scale=2) #detail plots if detail: detailFolder = os.path.join(outFolder, "detailPlots") for t in tiles: p = ggplot.ggplot(data.rx(data.rx2("tile").ro == t, True)) p = p + ggplot.aes_string(x="x", y="y", col=prop) \ + ggplot.geom_point(size=1) \ + ggplot.facet_wrap(robjects.Formula("~ tile")) \ + ggplot.scale_colour_gradient(high=high, low=low) \ + ggplot.ggtitle("%i %s" % (t, prop)) if prefix: fileName = "%s_%i_%s.png" % (prefix, t, prop) else: fileName = "%i_%s.png" % (t, prop) p.save(os.path.join(detailFolder, fileName), scale=2) if pdf: fileName = "%s%i_%s.pdf" % (prefix, t, prop) p.save(os.path.join(detailFolder, fileName), scale=2)
def makeDistanceBox( alldata, figurename, feature="distance") : alldata["distance"] = alldata.het + alldata.hom r_dataframe = com.convert_to_r_dataframe(alldata) p = ggplot2.ggplot(r_dataframe) + \ ggplot2.aes_string(x="factor(continent)", y=feature) + \ ggplot2.geom_boxplot() + \ ggplot2.ggtitle("Distance from Reference by Continent") + \ ggplot2.theme(**mytheme) #+ \ #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \ #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') ) grdevices.png(figurename) p.plot() grdevices.dev_off()
def plot_summary(barcodes_obs, barcode_table, directory, expt_id): barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table) df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches}) p = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \ ggplot2.geom_boxplot(outlier_size = 0) + \ ggplot2.geom_jitter() + \ ggplot2.ggtitle(label = expt_id) + \ ggplot2.ggplot2.xlab(label = "") + \ ggplot2.scale_y_continuous(name = "Count\n(million reads)") filename = "{0}/{1}.png".format(directory, expt_id) grdevices.png(filename=filename, width=4, height=5, unit='in', res=300) p.plot() grdevices.dev_off()
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel): grdevices.pdf(file=plot_file_path) gp = ggplot2.ggplot(data) pp = gp \ + ggplot2.aes_string(x='genotype', y='abundance') \ + ggplot2.geom_boxplot() \ + ggplot2.ggtitle(title) \ + ggplot2.labs(x=xlabel, y=ylabel) \ + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \ + ggplot2.geom_point() pp.plot() grdevices.dev_off()
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None): """Makes correlation plots between CEL files for the same cell type""" fsize = 10 names_1 = [] names_2 = [] cors = [] titles = [] for ex_idx, ex in enumerate(expt_names): # Indices of CEL files (columns of expr) corresponding to that cell type tmp_idx = expt_name_idx[ex] plot_idx = 0 for i in range(len(tmp_idx)): name1 = re.sub('_', '.', cel_names[tmp_idx[i]]) for j in range(i + 1, len(tmp_idx)): name2 = re.sub('_', '.', cel_names[tmp_idx[j]]) plot_idx += 1 cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1] names_1.append(name1) names_2.append(name2) cors.append(cor) titles.append(ex + '-' + str(plot_idx)) df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 'y':ro.FloatVector(expr[:, tmp_idx[j]])}) gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \ ggplot2.geom_point(size = 1) + \ ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \ ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.title.x':ggplot2.element_text(size = 8), 'axis.text.y':ggplot2.element_text(size = fsize), 'axis.title.y':ggplot2.element_text(size = 8, angle = 90), 'plot.title':ggplot2.element_text(size = fsize)}) if outdir is None: gp.plot() else: if not os.path.isdir(outdir): os.makedirs(outdir) outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png') ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm') df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles) if not outdir is None: df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t') return df
def ridge_cv_plot(val_err, lam_range): """ Source: http://rpy.sourceforge.net/rpy2/doc-2.3/html/graphics.html """ base = importr('base') df = pd.DataFrame(val_err, columns = lam_range) df = pd.melt(df) df_r = com.convert_to_r_dataframe(df) # Create boxplot gp = ggplot2.ggplot(df_r) pp = gp + \ ggplot2.aes_string(x='factor(variable)', y='value') + \ ggplot2.geom_boxplot() + \ ggplot2.ggtitle("Validation Error by Lambda") pp.plot() return
##text_log+="average: "+str(rmean(test23)[0])+end ##text_log+="sum: "+str(rsum(test23)[0])+end # #roughbin= round(ma[0]/100) #bins=round(roughbin/100)*100 #ma2=rmax(ed) #dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true) scales = importr('scales') gp = ggplot2.ggplot(dataf) #geom_histogram(aes(y = ..density..)) # ggplot2.geom_density()+\ # pp = gp + ggplot2.aes_string(x='%s(contrrr)') + ggplot2.geom_histogram()+ggplot2.scale_y_sqrt() bins=10 teest3=robjects.r('theme(axis.text.x=element_text(angle=90))') pp = gp + \ ggplot2.aes_string(x='Length') + \ ggplot2.geom_histogram()+\ ggplot2.ggtitle("Found IS fragment lengths")+ \ ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\ ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \ teest3 pp.plot() robjects.r.ggsave("/Users/security/science/dna_subj_hist.pdf")
def show1(): open1() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()
heat_demand = np.zeros(37) Bdim = robjects.FloatVector([12,6]) for i,BO in enumerate(range(0,361,10)): res = ECR(Building_Orientation = BO, Building_Dim = Bdim) heat_demand[i] = res[2][0] # Transfor to R data types hd = robjects.FloatVector([h for h in heat_demand]) bo = robjects.FloatVector([b for b in range(0,361,10)]) # Create a python dictionary p_datadic = {'Heat_Demand': hd, 'Building_Orientation': bo} # Create R data.frame r_dataf = robjects.DataFrame(p_datadic) # plot with ggplot2 gp = ggplot2.ggplot(r_dataf) pp = gp + ggplot2.aes_string(y= 'Heat_Demand', x= 'Building_Orientation') + \ ggplot2.geom_line(colour = "red", size = 1) + \ ggplot2.coord_polar(direction = -1, start = -pi/2) + \ ggplot2.ggtitle("Heat demand for all possible buildimg orientations") + \ ggplot2.scale_x_continuous(breaks=robjects.FloatVector(range(0, 360, 15))) pp.plot() grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) # make a data frame of the lists d = {'start': r_start_times, 'lengths': r_read_lengths, 'cumul': cumulative} df = robjects.DataFrame(d) if args.savedf: robjects.r("write.table")(df, file=args.savedf, sep="\t") # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_step(size=2) \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) # extrapolation if args.extrapolate: start = robjects.ListVector({'a': 1, 'b': 1}) pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls', formula='y~a*I((x*3600)^b)', se='FALSE', start=start) \ + ggplot2.xlim(0, float(args.extrapolate)) if args.theme_bw: pp = pp + ggplot2.theme_bw() if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 8.5, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 8.5, height = 8.5, units = "in", res = 300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def main(): ''' maine ''' # Command Line Stuff... myCommandLine = CommandLine() outdir = myCommandLine.args['outDir'] group1 = myCommandLine.args['group1'] group2 = myCommandLine.args['group2'] batch = myCommandLine.args['batch'] matrix = myCommandLine.args['matrix'] prefix = myCommandLine.args['prefix'] formula = myCommandLine.args['formula'] print("running DESEQ2 %s" % prefix, file=sys.stderr) # make the quant DF quantDF = pd.read_table(matrix, header=0, sep='\t', index_col=0) df = pandas2ri.py2ri(quantDF) # import formula formulaDF = pd.read_csv(formula,header=0, sep="\t",index_col=0) sampleTable = pandas2ri.py2ri(formulaDF) if "batch" in list(formulaDF): design = Formula("~ batch + condition") else: design = Formula("~ condition") # import DESeq2 from rpy2.robjects.packages import importr import rpy2.robjects.lib.ggplot2 as ggplot2 methods = importr('methods') deseq = importr('DESeq2') grdevices = importr('grDevices') qqman = importr('qqman') ### RUN DESEQ2 ### R.assign('df', df) R.assign('sampleTable', sampleTable) R.assign('design',design) R('dds <- DESeqDataSetFromMatrix(countData = df, colData = sampleTable, design = design)') R('dds <- DESeq(dds)') R('name <- grep("condition", resultsNames(dds), value=TRUE)') ### ### # Get Results and shrinkage values res = R('results(dds, name=name)') resLFC = R('lfcShrink(dds, coef=name)') vsd = R('vst(dds,blind=FALSE)') resdf = robjects.r['as.data.frame'](res) reslfc = robjects.r['as.data.frame'](resLFC) dds = R('dds') ### Plotting section ### # plot MA and PC stats for the user plotMA = robjects.r['plotMA'] plotDisp = robjects.r['plotDispEsts'] plotPCA = robjects.r['plotPCA'] plotQQ = robjects.r['qq'] # get pca data if "batch" in list(formulaDF): pcaData = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") else: print(vsd) pcaData = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T']) percentVar = robjects.r['attr'](pcaData, "percentVar") # arrange data_folder = os.path.join(os.getcwd(), outdir) qcOut = os.path.join(data_folder, "%s_QCplots_%s_v_%s.pdf" % (prefix,group1,group2)) grdevices.pdf(file=qcOut) x = "PC1: %s" % int(percentVar[0]*100) + "%% variance" y = "PC2: %s" % int(percentVar[1]*100) + "%% variance" if "batch" in list(formulaDF): pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() else: pp = ggplot2.ggplot(pcaData) + \ ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \ ggplot2.geom_point(size=3) + \ robjects.r['xlab'](x) + \ robjects.r['ylab'](y) + \ ggplot2.theme_classic() + \ ggplot2.coord_fixed() pp.plot() plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results") plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrhinkage") plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ") hh = ggplot2.ggplot(resdf) + \ ggplot2.aes_string(x="pvalue") + \ ggplot2.geom_histogram() + \ ggplot2.theme_classic() + \ ggplot2.ggtitle("pvalue distribution") hh.plot() plotDisp(dds, main="Dispersion Estimates") grdevices.dev_off() data_folder = os.path.join(os.getcwd(), outdir) lfcOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results_shrinkage.tsv" % (prefix,group1,group2)) resOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results.tsv" % (prefix,group1,group2)) robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t") robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")
number_of_peaks = len(dataf[0]) cvI = [] newRow = [] for i in range(1,number_of_peaks+1): row = dataf.rx(i,True) rowA = np.array(row) newRow.append(rowA[2:]) cvI.append(cv(rowA[2:])) #cv.append(rowA[2:].std()/rowA[2:].mean()) cv_r=robjects.conversion.py2ri(cvI) df_cv = {'CV' : cv_r} dataf_cv = robjects.DataFrame(df_cv) dtf_cv = robjects.r.melt(dataf_cv) d=dataf.cbind(dtf_cv.rx(2)) d.names[tuple(d.colnames).index('value')] = 'CV' #d = base.merge_data_frame(dataf,dtf_cv.rx(2)) utilis.write_csv(d, options.csv_output) dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1))) #n_peak = robjects.IntVector(1,number_of_peaks) gp = ggplot2.ggplot(dc) pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \ ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV") r.X11() pp.plot()
def _plot_with_rpy2(self, regions, filename): from rpy2 import robjects import rpy2.robjects.lib.ggplot2 as ggplot2 from rpy2.robjects.lib import grid from rpy2.robjects.packages import importr grdevices = importr('grDevices') base = importr('base') grdevices.pdf(file=filename + '.pdf') t = [x for x in range(-self.num_bins, self.num_bins + 1)] for region in regions[:self.num_regs]: if not np.any(region.weighted): logger.warning( "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.") continue middle = (len(region.weighted[0]) - 1) / 2 if middle < self.num_bins: logger.error("Warning: There are less bins calculated for regions than you want to plot.") sys.exit(1) d = {'map': robjects.StrVector( [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]), 't': robjects.FloatVector(t * len(region.weighted)), 'e': robjects.FloatVector([i for sublist in region.weighted for i in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]])} dataf = robjects.DataFrame(d) gp = ggplot2.ggplot(dataf) # first yellow second red p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'), alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle( "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs( y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'), alpha=0.8) + ggplot2.labs( y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'), alpha=0.8) + ggplot2.labs(y="-log10(q-value)", x='bins (' + str(self.bin_res) + ' bp each)') + \ ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))), colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \ ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") g1 = ggplot2.ggplot2.ggplotGrob(p1) g2 = ggplot2.ggplot2.ggplotGrob(p2) g3 = ggplot2.ggplot2.ggplotGrob(p3) robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first') robjects.r("grid::grid.draw(g)") grid.newpage() logger.debug('Plotted region ' + str(region.bin)) grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) # make a data frame of the lists d = {'start': r_start_times, 'lengths': r_read_lengths, 'cumul': cumulative} df = robjects.DataFrame(d) # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_point() \ + ggplot2.geom_line() \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 8.5, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 8.5, height = 8.5, units = "in", res = 300) else: print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
#text_log+="sum: "+str(rsum(test23)[0])+end roughbin= round(ma[0]/100) bins=round(roughbin/100)*100 #ma2=rmax(ed) #dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true) scales = importr('scales') gp = ggplot2.ggplot(dataf) #geom_histogram(aes(y = ..density..)) # ggplot2.geom_density()+\ # pp = gp + ggplot2.aes_string(x='%s(contrrr)') + ggplot2.geom_histogram()+ggplot2.scale_y_sqrt() #bins=10 theme=robjects.r('theme(axis.text.x=element_text(angle=90))') pp = gp + \ ggplot2.aes_string(x='Length') + \ ggplot2.geom_histogram()+\ ggplot2.ggtitle("Found IS fragment lengths")+ \ ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\ ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \ ggplot2.ggtitle(args.title)+\ theme pp.plot() robjects.r.ggsave(args.out)
def show4(): open4() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()
def _plt_percountr(dat, independentpdf=False, fname='xpercount.pdf'): def _filt_dat(dat, item, getlabel=True): df = pd.DataFrame(dat[item].value_counts()) df.columns = ['count'] if getlabel: df['label'] = [ list(dat[dat[item] == i]['label'])[0] for i in df.index ] n = len(df) mx = max(df['count']) return df, n, mx dat = dat[dat['label'] != 'NA'] ## NUMBER OF MIRNA PER TSS df, n, mx = _filt_dat(dat, 'tss', False) df = {'count': robjects.IntVector(df['count'])} df = robjects.DataFrame(df) pt = ggplot2.ggplot(df) + \ ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \ ggplot2.xlim(-.5, mx+1) + \ ggplot2.aes_string(x='count') + \ ggplot2.ggtitle('TSS [Total = %s]' % n) + \ ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx) pt_den = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='count', y='..density..') + \ ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \ ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \ ggplot2.ggtitle('TSS [Total = %s]' % n) + \ ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx) ## NUMBER OF TSS PER MIRNA df, n, mx = _filt_dat(dat, 'mirna') df = { 'count': robjects.IntVector(df['count']), 'label': robjects.StrVector(df['label']) } df = robjects.DataFrame(df) _pm = ggplot2.ggplot(df) + \ ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \ ggplot2.xlim(-.5, mx+1) + \ ggplot2.ggtitle('miRNA [Total = %s]' % n) _pm_den = ggplot2.ggplot(df) + \ ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \ ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \ ggplot2.ggtitle('miRNA [Total = %s]' % n) ## not split by label pm = _pm + ggplot2.aes_string(x='count') pm_den = _pm_den + ggplot2.aes_string(x='count', y='..density..') ## split by label pms = _pm + ggplot2.aes_string(x='count', fill='label') pm_dens = _pm_den + ggplot2.aes_string( x='count', fill='label', y='..density..') ## add xlabelling (need to be added after aes_string) _xlab = ggplot2.labs(x='Number of TSS per miRNA (max = %s)' % mx) pm += _xlab pm_den += _xlab pms += _xlab pm_dens += _xlab if independentpdf: grdevices = importr('grDevices') grdevices.pdf(fname) pt.plot() pt_den.plot() pm.plot() pm_den.plot() pms.plot() pm_dens.plot() grdevices.dev_off() else: pt.plot() pt_den.plot() pm.plot() pm_den.plot() pms.plot() pm_dens.plot() return
#-- ggplot2mtcars-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width = 1000, height = 350, antialias="subpixel", type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_bin2d() + \ ggplot2.ggtitle('geom_bin2d') pp.plot(vp = vp) #-- ggplot2geombin2d-end vp = grid.viewport(**{'layout.pos.col':2, 'layout.pos.row': 1}) #-- ggplot2geomdensity2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_density2d() + \ ggplot2.ggtitle('geom_density2d') pp.plot(vp = vp) #-- ggplot2geomdensity2d-end vp = grid.viewport(**{'layout.pos.col':3, 'layout.pos.row': 1})
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) step = args.skip # make a data frame of the lists d = { 'start': robjects.FloatVector( [r_start_times[n] for n in xrange(0, len(r_start_times), step)]), 'lengths': robjects.IntVector( [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]), 'cumul': robjects.IntVector( [cumulative[n] for n in xrange(0, len(cumulative), step)]) } df = robjects.DataFrame(d) if args.savedf: robjects.r("write.table")(df, file=args.savedf, sep="\t") # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_step(size=2) \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) # extrapolation if args.extrapolate: start = robjects.ListVector({'a': 1, 'b': 1}) pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls', formula='y~a*I((x*3600)^b)', se='FALSE', start=start) \ + ggplot2.xlim(0, float(args.extrapolate)) if args.theme_bw: pp = pp + ggplot2.theme_bw() if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width=8.5, height=8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width=8.5, height=8.5, units="in", res=300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None): """Makes correlation plots between CEL files for the same cell type""" fsize = 10 names_1 = [] names_2 = [] cors = [] titles = [] for ex_idx, ex in enumerate(expt_names): # Indices of CEL files (columns of expr) corresponding to that cell type tmp_idx = expt_name_idx[ex] plot_idx = 0 for i in range(len(tmp_idx)): name1 = re.sub('_', '.', cel_names[tmp_idx[i]]) for j in range(i + 1, len(tmp_idx)): name2 = re.sub('_', '.', cel_names[tmp_idx[j]]) plot_idx += 1 cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1] names_1.append(name1) names_2.append(name2) cors.append(cor) titles.append(ex + '-' + str(plot_idx)) df = ro.DataFrame({ 'x': ro.FloatVector(expr[:, tmp_idx[i]]), 'y': ro.FloatVector(expr[:, tmp_idx[j]]) }) gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \ ggplot2.geom_point(size = 1) + \ ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \ ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.title.x':ggplot2.element_text(size = 8), 'axis.text.y':ggplot2.element_text(size = fsize), 'axis.title.y':ggplot2.element_text(size = 8, angle = 90), 'plot.title':ggplot2.element_text(size = fsize)}) if outdir is None: gp.plot() else: if not os.path.isdir(outdir): os.makedirs(outdir) outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png') ro.r.ggsave(filename=outfile, plot=gp, width=85, height=85, unit='mm') df = pd.DataFrame({ 'name1': names_1, 'name2': names_2, 'cor': cors }, index=titles) if not outdir is None: df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t') return df
#print onlysurf #colours2 = grdevices.topo_colors(10) colours2 = grdevices.cm_colors(10) #colours2 = grdevices.rainbow(20) #print colours2 #colours = ggplot2.rainbow(54) #bins=10 gp = ggplot2.ggplot(onlysurf) #gp = ggplot2.ggplot(onlyfilts) gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station") gp=gp+ggplot2.scale_colour_gradientn(colours=colours2) gp=gp+ggplot2.geom_text(col="black",offset = 10) gp=gp+ggplot2.geom_point(position="jitter") gp=gp+ggplot2.ggtitle(graphtitle) robjects.r('library(ggmap)') robjects.r('library(mapproj)') robjects.r('map <- get_map(location = "Europe", zoom = 4)') robjects.r('ggmap(map)') #robjects.r('library(maps)') #robjects.r('map("world", interior = FALSE)') #robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)') #gp.plot() ''' pp = gp + \
r_sq_lab = "R^{2}~"+r_sq y_lab = r("expression(Discharge (m^{3}/s))") x_lab = r("expression(Area (km^{2}))") annotate1 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.5, color = "red", label = "Mean Annual", parse=FALSE)') annotate2 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.42, label = "'+r_sq_lab+'", color = "red", parse=TRUE)') annotate3 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.34, label = "slope~'+sl+'", color = "red", parse=TRUE)') annotate4 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.7, color = "blue", label = "LGM", parse=FALSE)') annotate5 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.6, color = "blue", label = "'+r_sq_lab_lgm+'", parse=TRUE)') annotate6 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.5, color = "blue", label = "slope~'+sl_lgm+'", parse=TRUE)') pp = ggplot2.ggplot(dat_frame) + \ ggplot2.aes_string(y='discharge', x='areas') + \ ggplot2.ggtitle('Area vs. Sediment Flux') + \ ggplot2.scale_x_log10(x_lab) + \ ggplot2.theme_bw() + \ ggplot2.stat_smooth(method = "lm", formula = 'y ~ x') + \ ggplot2.scale_y_log10(y_lab) + \ annotate1 + \ annotate2 + \ annotate3 + \ annotate4 + \ annotate5 + \ annotate6 + \ ggplot2.geom_point(color='blue') + \ ggplot2.geom_errorbar(ggplot2.aes_string(ymin='min',ymax='max'), data=dat_frame, width=.02, alpha=.3) + \ ggplot2.geom_point(data=dat_frame2,color='red',show_guide='FALSE' ) + \ ggplot2.stat_smooth(data=dat_frame2, method = "lm", formula = 'y ~ x', color='red')
grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width=1000, height=350, antialias="subpixel", type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_bin2d() + \ ggplot2.ggtitle('geom_bin2d') pp.plot(vp=vp) #-- ggplot2geombin2d-end vp = grid.viewport(**{'layout.pos.col': 2, 'layout.pos.row': 1}) #-- ggplot2geomdensity2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_density2d() + \ ggplot2.ggtitle('geom_density2d') pp.plot(vp=vp) #-- ggplot2geomdensity2d-end vp = grid.viewport(**{'layout.pos.col': 3, 'layout.pos.row': 1})
def plot_squiggle(args, filename, start_times, mean_signals): """ Use rpy2 to create a squiggle plot of the read """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] total_time = start_times[-1] - start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([t - t_0 for t in start_times]) r_mean_signals = robjects.FloatVector(mean_signals) # infer the appropriate number of events given the number of facets num_events = len(r_mean_signals) events_per_facet = (num_events / args.num_facets) + 1 # dummy variable to control faceting facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))]) # make a data frame of the start times and mean signals d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category} df = robjects.DataFrame(d) gp = ggplot2.ggplot(df) if not args.theme_bw: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) else: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \ + ggplot2.theme_bw() if args.saveas is not None: plot_file = os.path.basename(filename) + "." + args.saveas if os.path.isfile(plot_file): raise Exception( 'Cannot create plot for %s: plot file %s already exists' % (filename, plot_file)) if args.saveas == "pdf": grdevices.pdf(plot_file, width=8.5, height=11) elif args.saveas == "png": grdevices.png(plot_file, width=8.5, height=11, units="in", res=300) pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()