def _plot_with_rpy2(self, regions, filename): from rpy2 import robjects import rpy2.robjects.lib.ggplot2 as ggplot2 from rpy2.robjects.lib import grid from rpy2.robjects.packages import importr grdevices = importr('grDevices') base = importr('base') grdevices.pdf(file=filename + '.pdf') t = [x for x in range(-self.num_bins, self.num_bins + 1)] for region in regions[:self.num_regs]: if not np.any(region.weighted): logger.warning( "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.") continue middle = (len(region.weighted[0]) - 1) / 2 if middle < self.num_bins: logger.error("Warning: There are less bins calculated for regions than you want to plot.") sys.exit(1) d = {'map': robjects.StrVector( [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]), 't': robjects.FloatVector(t * len(region.weighted)), 'e': robjects.FloatVector([i for sublist in region.weighted for i in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]]), 'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in sublist[middle - self.num_bins:middle + self.num_bins + 1]])} dataf = robjects.DataFrame(d) gp = ggplot2.ggplot(dataf) # first yellow second red p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'), alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle( "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs( y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'), alpha=0.8) + ggplot2.labs( y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme( **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(), 'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'), alpha=0.8) + ggplot2.labs(y="-log10(q-value)", x='bins (' + str(self.bin_res) + ' bp each)') + \ ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))), colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \ ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1") g1 = ggplot2.ggplot2.ggplotGrob(p1) g2 = ggplot2.ggplot2.ggplotGrob(p2) g3 = ggplot2.ggplot2.ggplotGrob(p3) robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first') robjects.r("grid::grid.draw(g)") grid.newpage() logger.debug('Plotted region ' + str(region.bin)) grdevices.dev_off()
def line_plot(self, data, title, ylabel, img_file, x='date', y='size', c='type', clabel=''): if PLOTLIB == 'ggplot': # date_label = "%Y\n%b" date_label = "%Y\n%W" # year + week number p = ggplot(data, aes(x=x, y=y, color=c)) \ + ggtitle(title) \ + ylab(ylabel) \ + xlab(' ') \ + scale_x_date(breaks=date_breaks('3 months'), labels=date_label) \ + geom_line() + geom_point() elif PLOTLIB == 'rpy2.ggplot2': # convert y axis to float because R uses 32-bit signed integers, # values > 2 bln. (2^31) will overflow data[y] = data[y].astype(float) p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x=x, y=y, color=c) \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='', y=ylabel, color=clabel) img_path = os.path.join(PLOTDIR, img_file) p.save(img_path) # data.to_csv(img_path + '.csv') return p
def render_plot(gp, args): """Render a plot using ggplot :gp: A base ggplot2 object :x: The x value expression :y: The y value expression :type: The type of plot to make """ args = util.Namespace(args) import rpy2.robjects.lib.ggplot2 as ggplot2 pp = gp + ggplot2.aes_string(x=args.x, y=args.y) if args.type == 'points': pp += ggplot2.geom_point() elif args.type == 'lines': pp += ggplot2.geom_line() elif args.type == 'boxplot': pp += ggplot2.geom_boxplot() else: raise Exception("{0} not implemented".format(args.type)) if args.facets is not None: try: pp += ggplot2.facet_grid(ro.Formula(args.facets)) except Exception: pass try: pp.plot() except Exception: pass
def plot(data, x, y, ylabel, color, filename): gp = ggplot2.ggplot(data=data) gp = gp + \ ggplot2.geom_line(ggplot2.aes_string(x=x, y=y), color=color) + \ ggplot2.theme(**{'axis.text.x' : ggplot2.element_text(angle = 90, hjust = 1), 'strip.text.y' : ggplot2.element_text(size = 6, angle=90)}) + \ ggplot2.scale_y_continuous(ylabel) ggplot2.ggplot2.ggsave(filename, gp)
def plot_ROC(self, path): robjects.r['pdf'](path, width=14, height=8) df = self.df print(df) gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) gp += ggplot2.aes_string(x='fpr', y='tpr') gp += ggplot2.geom_line(color='blue') gp += ggplot2.geom_point(size=2) gp.plot()
def plot_ROC(self, path): robjects.r["pdf"](path, width=14, height=8) df = self.df # print(df) gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) gp += ggplot2.aes_string(x="fpr", y="tpr") gp += ggplot2.geom_line(color="blue") gp += ggplot2.geom_point(size=2) gp.plot()
def plot_all_errors(self, path): # print self.error_matrix[0] robjects.r["pdf"](path, width=14, height=8) df = pandas.melt(self.df, id_vars="iteration") gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True)) x_col = "iteration" gp += ggplot2.aes_string(x=x_col, y="value", color="variable") gp += ggplot2.geom_point(size=2) gp += ggplot2.geom_line() gp.plot()
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean, prefix=''): pandas2ri.activate() subgroups_to_lrr_count = {} columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []} for subgroup, (acc20means, acc20_count) in subgroups_to_lrrs_acc20mean.items(): subgroups_to_lrr_count[subgroup] = acc20_count for index, acc20mean in enumerate(acc20means): columns_to_data['subgroup'].append(subgroup) columns_to_data['pos'].append(index + 1) columns_to_data['acc20'].append(acc20mean) # Write the count of LRRs for each subgroup to file with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"), 'w') as f: for subgroup, lrr_count in subgroups_to_lrr_count.items(): f.write(str.format("{}: {}\n", subgroup, lrr_count)) # Generate the line chart file r_columns_to_data = { 'subgroup': ro.StrVector(columns_to_data['subgroup']), 'pos': ro.IntVector(columns_to_data['pos']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) line_chart_file_path = os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_acc20_line.png") logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx'))) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname, line_chart_file_path): logging.debug( str.format("Begin to generate {}, data {}", line_chart_file_path, ts_to_acc20s)) ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s) columns_to_data = {tname: [], 'site': [], 'acc20': []} for ss, acc20means in ts_to_acc20mean.items(): for index, acc20mean in enumerate(acc20means): columns_to_data[tname].append(ss) columns_to_data['site'].append(index - 5) columns_to_data['acc20'].append(acc20mean) # Generate the line chart file r_columns_to_data = { tname: ro.StrVector(columns_to_data[tname]), 'site': ro.IntVector(columns_to_data['site']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))), labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5'])) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def plot_domain_cumul(self, crawl): # -- coverage (cumulative pages) per domain data = self.histogr data = data[data['type'].isin(['domain'])] data = data[data['crawl'] == crawl] data = data[data['type_counted'].isin(['url'])] data['urls'] = data['count']*data['frequency'] print(data) data = data[['urls', 'count', 'frequency']] data = data.sort_values(['count'], ascending=0) data['cum_domains'] = data['frequency'].cumsum() data['cum_urls'] = data['urls'].cumsum() data_perc = data.apply(lambda x: round(100.0*x/float(x.sum()), 1)) data['%domains'] = data_perc['frequency'] data['%urls'] = data_perc['urls'] data['%cum_domains'] = data['cum_domains'].apply( lambda x: round(100.0*x/float(data['frequency'].sum()), 1)) data['%cum_urls'] = data['cum_urls'].apply( lambda x: round(100.0*x/float(data['urls'].sum()), 1)) with pandas.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 200): print(data) img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png') # data.to_csv(img_path + '.csv') title = 'Cumulative URLs for Top Domains' p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='cum_domains', y='cum_urls') \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='domains cumulative', y='URLs cumulative') \ + ggplot2.scale_y_log10() \ + ggplot2.scale_x_log10() p.save(img_path) return p
def plot_domain_cumul(self, crawl): # -- coverage (cumulative pages) per domain data = self.histogr data = data[data['type'].isin(['domain'])] data = data[data['crawl'] == crawl] data = data[data['type_counted'].isin(['url'])] data['urls'] = data['count'] * data['frequency'] print(data) data = data[['urls', 'count', 'frequency']] data = data.sort_values(['count'], ascending=0) data['cum_domains'] = data['frequency'].cumsum() data['cum_urls'] = data['urls'].cumsum() data_perc = data.apply(lambda x: round(100.0 * x / float(x.sum()), 1)) data['%domains'] = data_perc['frequency'] data['%urls'] = data_perc['urls'] data['%cum_domains'] = data['cum_domains'].apply( lambda x: round(100.0 * x / float(data['frequency'].sum()), 1)) data['%cum_urls'] = data['cum_urls'].apply( lambda x: round(100.0 * x / float(data['urls'].sum()), 1)) with pandas.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 200): print(data) img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png') # data.to_csv(img_path + '.csv') title = 'Cumulative URLs for Top Domains' p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='cum_domains', y='cum_urls') \ + ggplot2.geom_line() + ggplot2.geom_point() \ + GGPLOT2_THEME \ + ggplot2.labs(title=title, x='domains cumulative', y='URLs cumulative') \ + ggplot2.scale_y_log10() \ + ggplot2.scale_x_log10() p.save(img_path) return p
'axis.title.x': element_text(size=12, color=robjects.r.color_axis_title, vjust=0), #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25), 'axis.title.y': element_text(size=12, color=robjects.r.color_axis_title, angle=90) }) pandas2ri.activate() #set up basic, repetitive plot features base_plot = ggplot2.aes_string(x='mos_since_start', y='value', group='variable', colour='variable', shape='variable', linetype='variable') line = ggplot2.geom_line() point = ggplot2.geom_point() vert_line_onset = ggplot2.geom_vline(xintercept=-1.5, linetype=2, colour="#999999") vert_line_exhaust = ggplot2.geom_vline(xintercept=5.5, linetype=2, colour="#999999") vert_line_exhaust_FL = ggplot2.geom_vline(xintercept=3.5, linetype=2, colour="#999999") colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines) hollow = ggplot2.scale_shape_manual( values=robjects.r('c(16,17,15,18,6,7,9,3)')) xlab = ggplot2.labs(x="Months Since First UI Check") loc_default = robjects.r('c(1,0)')
xmin = np.min(x) xmax = np.max(x) xs = np.linspace(xmin, xmax, num=100).reshape(100, 1) lm = LinearRegression() # The training data for scikit models must be in matrix # form, i.e. columns == features, rows == observations. # For this we need to reshape the 1-dimensional arrays. X = corr_nci60.reshape(len(x), 1) y = corr_sec lm.fit(X, y) y_pred = lm.predict(xs) # Plot the data using the R-bridge rpy and ggplot p = gg.ggplot(pd.DataFrame()) p += gg.geom_point( gg.aes_string(x='r_nci60', y='r_sec'), data=pd.DataFrame({ 'r_nci60': corr_nci60, 'r_sec': corr_sec }) ) p += gg.geom_line( gg.aes_string(x='x', y='y'), data=pd.DataFrame({ 'x': xs.reshape(-1), 'y': y_pred }), color='red' ) p.plot()
for col_i, yscale in enumerate(['log', 'linear']): vp = grid.viewport(**{'layout.pos.col':col_i+1, 'layout.pos.row': 1}) pp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='Date', y='Performance', color='color', shape='PerfType', linetype='PerfType') + \ ggplot2.opts(**{'title' : 'Performance vs. Color', 'legend.key.size' : ro.r.unit(1.4, "lines") } ) + \ ggplot2.scale_colour_manual("Color", values=colormap, breaks=colormap.names, labels=[elt[1] for elt in colormap_labels]) + \ ggplot2.geom_point(size=3) + \ ggplot2.scale_linetype_manual(values=linemap) + \ ggplot2.geom_line(size=1.5) # custom y-axis lines: major lines ("breaks") are every 10^n; 9 # minor lines ("minor_breaks") between major lines if (yscale == 'log'): pp = pp + \ ggplot2.scale_y_log10(breaks = ro.r("10^(%d:%d)" % (gflops_range[0], gflops_range[1])), minor_breaks = ro.r("rep(10^(%d:%d), each=9) * rep(1:9, %d)" % (gflops_range[0] - 1, gflops_range[1], gflops_range[1] - gflops_range[0]))) pp.plot(vp = vp) #-- ggplot2perfcolor-end grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) # make a data frame of the lists d = {'start': r_start_times, 'lengths': r_read_lengths, 'cumul': cumulative} df = robjects.DataFrame(d) # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_point() \ + ggplot2.geom_line() \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 8.5, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 8.5, height = 8.5, units = "in", res = 300) else: print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
# Create animated .gif of pickups by hour import imageio file_names = sorted((fn for fn in os.listdir('./plots') if fn.startswith('taxi_pickups'))) file_names = ['plots/' + s for s in file_names] images = [] for filename in file_names: images.append(imageio.imread(filename)) imageio.mimsave('./plots/pickups_movie.gif', images, duration=0.4) # total pickups by date, color p1 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \ ggplot2.aes_string(x='date', y='total_pickups', color='type') + \ ggplot2.scale_colour_manual(values = robjects.StrVector(['green', 'yellow'])) + \ ggplot2.geom_line() + \ ggplot2.theme(legend_position='bottom') + \ ggplot2.labs(y='Total Pickups', x='Date', title='Total Pickups by Date') p1.save('./plots/pickups_by_date.png', width=6, height=5) # average fare and tip by date, color p2 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \ ggplot2.aes_string(x='date', y='fare_amount', color='type') + \ ggplot2.scale_colour_manual(values = robjects.StrVector(['green', 'yellow'])) + \ ggplot2.geom_line() + \ ggplot2.theme(legend_position='bottom') + \ ggplot2.labs(y='Average Fare ($)', x='Date', title='Average Fare by Date') p2.save('./plots/fares_by_date.png', width=6, height=5) p3 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \ ggplot2.aes_string(x='date', y='tip_amount', color='type') + \
d['code'] = StrVector([x[0] for x in combos]) + StrVector([x[0] for x in combos_r]) d['sequence'] = StrVector([x[-2] for x in combos]) + StrVector( [x[0] for x in combos_r]) d['time'] = FloatVector([x for x in times]) + FloatVector( [x[0] for x in combos_r]) d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector( [x[1] for x in combos_r]) d['group'] = StrVector( [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width=712, height=512) p.plot() grdevices.dev_off()
intercept = ests[0] slope = ests[1] gs = [ests[j + 2] for j in range(len(x))] print gs cut = min(0.5, np.percentile(gs, 15)) typical = [g >= cut for g in gs] pdf = ro.DataFrame( { "x": ro.FloatVector(x), "y": ro.FloatVector(y), "e": ro.FloatVector(e), "ymin": ro.FloatVector(y - e), "ymax": ro.FloatVector(y + e), "yest": ro.FloatVector(slope * x + intercept), "typical": ro.BoolVector(typical), } ) rprint(pdf) gpf = ggplot2.ggplot(pdf) ppf = ( gpf + ggplot2.geom_point(ggplot2.aes_string(x="x", y="y", color="typical", shape="typical"), size=5) + ggplot2.geom_errorbar(ggplot2.aes_string(x="x", ymin="ymin", ymax="ymax")) + ggplot2.geom_line(ggplot2.aes_string(x="x", y="yest")) ) grdevices.png(file="fit.png", width=512, height=512) print (ppf) grdevices.dev_off()
'axis.text.x':element_text(size=size,color=robjects.r.color_axis_text), 'axis.text.y':element_text(size=size,color=robjects.r.color_axis_text), 'axis.title.x':element_text(size=size,color=robjects.r.color_axis_title, vjust=0), #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25), 'axis.title.y':element_text(size=size,color=robjects.r.color_axis_title,angle=90)}) #??? efficiently change legend titles #right now it takes two legend calls to make this work #alternatives that tried and failed #base_plot = lambda gr_name = 'variable': ggplot2.aes_string(x='x', y='value',group=gr_name,colour=gr_name, shape = gr_name) #colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines, name = ltitle) pandas2ri.activate() #set up basic, repetitive plot features base_plot = ggplot2.aes_string(x='x', y='value',group='variable',colour='variable', shape = 'variable') line = ggplot2.geom_line() point = ggplot2.geom_point() bar = ggplot2.geom_bar(stat="identity") vert_line_onset = ggplot2.geom_vline(xintercept=-1, linetype=2, colour="red", alpha=0.25) vert_line_exhaust = ggplot2.geom_vline(xintercept=5, linetype=2, colour="red", alpha=0.25) ltitle = "crazy" ltitle_default = 'Variable' #colors = lambda ltitle = ltitle_default: ggplot2.scale_colour_manual(values=robjects.r.palette_lines, name = ltitle) colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines) legend_t_c = lambda ltitle = ltitle_default: ggplot2.scale_color_discrete(name = ltitle) legend_t_s = lambda ltitle = ltitle_default: ggplot2.scale_shape_discrete(name = ltitle) loc_default = robjects.r('c(1,0)') legend_f = lambda loc = loc_default: ggplot2.theme(**{'legend.position':loc, 'legend.justification':loc}) ggsave = lambda filename, plot: robjects.r.ggsave(filename=out_path + filename + ".pdf", plot=plot, width = 6, height = 4) colors_alt = ggplot2.scale_colour_manual(values=robjects.r.palette_lines[1])
def as_dataframe(cfg, results, basis): r = robjects.r varis = [] langs = [] probs = [] times = [] threads = [] # speedups, with upper and lower bounds below speedups = [] speedup_lowers = [] speedup_uppers = [] ses = [] # standard errors mems = [] # memory usage langs_ideal = list(cfg.languages) langs_ideal.append('ideal') probs_ideal = list(cfg.problems) probs_ideal.append('ideal') for var in cfg.variations: for lang in langs_ideal: # cfg.languages: for prob in probs_ideal: # cfg.problems: for thread in cfg.threads: if lang == 'ideal' and prob == 'ideal': continue elif lang == 'ideal' or prob == 'ideal': varis.append(var) langs.append(pretty_langs[lang]) probs.append(prob) threads.append(thread) speedups.append(thread) speedup_lowers.append(thread) speedup_uppers.append(thread) times.append(0) ses.append(0) mems.append(0) continue varis.append(var) # pretty_varis [var]) langs.append(pretty_langs[lang]) probs.append(prob) threads.append(thread) if var.find('seq') >= 0: thread = cfg.threads[-1] vals = FloatVector(results[thread][prob][var][lang][0]) time = mean(vals) times.append(time) # # time confidence interval # t_result = r['t.test'](FloatVector(vals), **{ " conf.level": 0.999 }).rx('conf.int')[0] ses.append((t_result[1] - t_result[0]) / 2) # # memory usage # mem_filename = get_mem_output(lang, prob, var) with open(mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append(float(mem)) # we include dummy data for the sequential case to avoid the # speedup calculation below if var.find('seq') >= 0: speedups.append(1) speedup_lowers.append(1) speedup_uppers.append(1) continue # # speedup values and confidence intervals # seq_vals = results[cfg.threads[-1]][prob][var.replace( 'par', 'seq')][lang][0] # sequential base base = FloatVector(seq_vals) # base with p = 1 base_p1 = FloatVector(results[1][prob][var][lang][0]) # use fastest sequential program if basis == 'fastest' and mean(base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 labels = ['Base' ] * r.length(base)[0] + ['N'] * r.length(vals)[0] df = DataFrame({ 'Times': base + vals, 'Type': StrVector(labels) }) ratio_test = r['pairwiseCI'](r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{ 'var.equal': False })[0][0] speedups.append(mean(base) / time) speedup_lowers.append(ratio_test[1][0]) speedup_uppers.append(ratio_test[2][0]) df = robjects.DataFrame({ 'Language': StrVector(langs), 'Problem': StrVector(probs), 'Variation': StrVector(varis), 'Threads': IntVector(threads), 'Time': FloatVector(times), 'SE': FloatVector(ses), 'Speedup': FloatVector(speedups), 'SpeedupLower': FloatVector(speedup_lowers), 'SpeedupUpper': FloatVector(speedup_uppers), 'Mem': FloatVector(mems) }) r.assign('df', df) r('save (df, file="performance.Rda")') # reshape the data to make variation not a column itself, but a part of # the other columns describe ie, time, speedup, etc. # # also, remove the 'ideal' problem as we don't want it in this plot. df = r(''' redf = reshape (df, timevar="Variation", idvar = c("Language","Problem","Threads"), direction="wide") redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain")) redf[which(redf$Problem != "ideal"),] ''') r.pdf('speedup-expertpar-all.pdf', height=6.5, width=10) change_name = 'Language' legendVec = IntVector(range(len(langs_ideal))) legendVec.names = StrVector(langs_ideal) gg = ggplot2.ggplot(df) limits = ggplot2.aes(ymax='SpeedupUpper.expertpar', ymin='SpeedupLower.expertpar') dodge = ggplot2.position_dodge(width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\ robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\ ggplot2.aes_string(x='Threads', y='Speedup.expertpar', group=change_name, color=change_name, shape=change_name) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2), 'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2), 'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10), 'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10), 'legend.text' : ggplot2.theme_text(family = 'serif', size = 10), 'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'aspect.ratio' : 1, }) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Number of cores")') + \ ggplot2.facet_wrap ('Problem', nrow = 2) pp.plot() r['dev.off']()
from rpy2.robjects.vectors import DataFrame, FloatVector, StrVector, IntVector d = {} d['code'] = StrVector([x[0] for x in combos]) + StrVector([x[0] for x in combos_r]) d['sequence'] = StrVector([x[-2] for x in combos]) + StrVector([x[0] for x in combos_r]) d['time'] = FloatVector([x for x in times]) + FloatVector(times_r) d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector([x[3] for x in combos_r]) d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.labs(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width = 712, height = 512)
heat_demand = np.zeros(37) Bdim = robjects.FloatVector([12,6]) for i,BO in enumerate(range(0,361,10)): res = ECR(Building_Orientation = BO, Building_Dim = Bdim) heat_demand[i] = res[2][0] # Transfor to R data types hd = robjects.FloatVector([h for h in heat_demand]) bo = robjects.FloatVector([b for b in range(0,361,10)]) # Create a python dictionary p_datadic = {'Heat_Demand': hd, 'Building_Orientation': bo} # Create R data.frame r_dataf = robjects.DataFrame(p_datadic) # plot with ggplot2 gp = ggplot2.ggplot(r_dataf) pp = gp + ggplot2.aes_string(y= 'Heat_Demand', x= 'Building_Orientation') + \ ggplot2.geom_line(colour = "red", size = 1) + \ ggplot2.coord_polar(direction = -1, start = -pi/2) + \ ggplot2.ggtitle("Heat demand for all possible buildimg orientations") + \ ggplot2.scale_x_continuous(breaks=robjects.FloatVector(range(0, 360, 15))) pp.plot() grdevices.dev_off()
def as_dataframe (cfg, results, basis): r = robjects.r varis = [] langs = [] probs = [] times = [] threads = [] # speedups, with upper and lower bounds below speedups = [] speedup_lowers = [] speedup_uppers = [] ses = [] # standard errors mems = [] # memory usage langs_ideal = list (cfg.languages) langs_ideal.append ('ideal') probs_ideal = list (cfg.problems) probs_ideal.append ('ideal') for var in cfg.variations: for lang in langs_ideal: # cfg.languages: for prob in probs_ideal: # cfg.problems: for thread in cfg.threads: if lang == 'ideal' and prob == 'ideal': continue elif lang == 'ideal' or prob == 'ideal': varis.append (var) langs.append (pretty_langs[lang]) probs.append (prob) threads.append (thread) speedups.append (thread) speedup_lowers.append (thread) speedup_uppers.append (thread) times.append (0) ses.append(0) mems.append (0) continue varis.append (var) # pretty_varis [var]) langs.append (pretty_langs [lang]) probs.append (prob) threads.append (thread) if var.find('seq') >= 0: thread = cfg.threads[-1] vals = FloatVector (results[thread][prob][var][lang][0]) time = mean (vals) times.append (time) # # time confidence interval # t_result = r['t.test'] (FloatVector(vals), **{" conf.level": 0.999}).rx ('conf.int')[0] ses.append ((t_result[1] - t_result[0])/2) # # memory usage # mem_filename = get_mem_output (lang, prob, var) with open (mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append (float (mem)) # we include dummy data for the sequential case to avoid the # speedup calculation below if var.find('seq') >= 0: speedups.append (1) speedup_lowers.append (1) speedup_uppers.append (1) continue # # speedup values and confidence intervals # seq_vals = results[cfg.threads[-1]][prob][var.replace ('par', 'seq')][lang][0] # sequential base base = FloatVector (seq_vals) # base with p = 1 base_p1 = FloatVector (results[1][prob][var][lang][0]) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 labels = ['Base'] * r.length(base)[0] + ['N']*r.length (vals)[0] df = DataFrame ({'Times': base + vals, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False})[0][0] speedups.append (mean(base) / time) speedup_lowers.append (ratio_test[1][0]) speedup_uppers.append (ratio_test[2][0]) df = robjects.DataFrame({'Language': StrVector (langs), 'Problem': StrVector (probs), 'Variation' : StrVector (varis), 'Threads': IntVector (threads), 'Time': FloatVector (times), 'SE': FloatVector (ses), 'Speedup': FloatVector (speedups), 'SpeedupLower': FloatVector (speedup_lowers), 'SpeedupUpper': FloatVector (speedup_uppers), 'Mem' : FloatVector (mems) }) r.assign ('df', df) r ('save (df, file="performance.Rda")') # reshape the data to make variation not a column itself, but a part of # the other columns describe ie, time, speedup, etc. # # also, remove the 'ideal' problem as we don't want it in this plot. df = r(''' redf = reshape (df, timevar="Variation", idvar = c("Language","Problem","Threads"), direction="wide") redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain")) redf[which(redf$Problem != "ideal"),] ''') r.pdf ('speedup-expertpar-all.pdf', height=6.5, width=10) change_name = 'Language' legendVec = IntVector (range (len (langs_ideal))) legendVec.names = StrVector (langs_ideal) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'SpeedupUpper.expertpar', ymin = 'SpeedupLower.expertpar') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\ robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\ ggplot2.aes_string(x='Threads', y='Speedup.expertpar', group=change_name, color=change_name, shape=change_name) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2), 'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2), 'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10), 'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10), 'legend.text' : ggplot2.theme_text(family = 'serif', size = 10), 'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'aspect.ratio' : 1, }) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Number of cores")') + \ ggplot2.facet_wrap ('Problem', nrow = 2) pp.plot() r['dev.off']()
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df=self.df #import math, datetime grdevices = importr('grDevices') if not title: title=fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group) elif col: pp+=ggplot2.aes_string(x=x, y=y,col=col) elif group: pp+=ggplot2.aes_string(x=x, y=y,group=group) else: pp+=ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue') else: pp+=ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter') else: pp+=ggplot2.geom_point(size=size,position='jitter') else: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size) else: pp+=ggplot2.geom_point(size=size) if boxplot2: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA") else: pp+=ggplot2.geom_boxplot(color='blue') if smooth: if smooth=='lm': if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se) else: if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,se=se) if density: pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..')) if line: pp+=ggplot2.geom_line(position='jitter') pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} ) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp+=ggplot2.scale_colour_hue() if flip: pp+=ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: "+fn
def line_plot(cfg, var, control, change_name, changing, selector, base_selector, basis): speedups = [] thrds = [] changes = [] lowers = [] uppers = [] for n in cfg.threads: probs.append('ideal') langs.append('ideal') speedups.append(n) thrds.append(n) changes.append('ideal') lowers.append(n) uppers.append(n) for c in changing: sel = selector(c) # sequential base base = FloatVector(base_selector(c)) # base with p = 1 base_p1 = FloatVector(sel(1)) # use fastest sequential program if basis == 'fastest' and mean(base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 for n in cfg.threads: ntimes = FloatVector(sel(n)) # ratio confidence interval labels = ['Base'] * r.length(base)[0] + ['N'] * r.length(ntimes)[0] df = DataFrame({'Times': base + ntimes, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'](r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{ 'var.equal': False, 'conf.level': 0.999 })[0][0] lowers.append(ratio_test[1][0]) uppers.append(ratio_test[2][0]) mn = mean(ntimes) speedups.append(mean(base) / mn) # plot slowdowns #speedups.append (-mn/base)#(base / mn) thrds.append(n) if change_name == 'Language': changes.append(pretty_langs[c]) else: changes.append(c) df = DataFrame({ 'Speedup': FloatVector(speedups), 'Threads': IntVector(thrds), change_name: StrVector(changes), 'Lower': FloatVector(lowers), 'Upper': FloatVector(uppers) }) ideal_changing = ['ideal'] if change_name == 'Language': ideal_changing.extend([pretty_langs[c] for c in changing]) else: ideal_changing.extend(changing) legendVec = IntVector(range(len(ideal_changing))) legendVec.names = StrVector(ideal_changing) gg = ggplot2.ggplot(df) limits = ggplot2.aes(ymax='Upper', ymin='Lower') dodge = ggplot2.position_dodge(width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=3) +\ ggplot2.aes_string(x='Threads', y='Speedup', group=change_name, color=change_name, shape=change_name) + \ ggplot2.scale_shape_manual(values=legendVec) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2_options () + \ ggplot2_colors () + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Cores")') # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\ pp.plot() r['dev.off']()
def line_plot (cfg, var, control, change_name, changing, selector, base_selector, basis): speedups = [] thrds = [] changes = [] lowers = [] uppers = [] for n in cfg.threads: probs.append ('ideal') langs.append ('ideal') speedups.append (n) thrds.append (n) changes.append ('ideal') lowers.append (n) uppers.append (n) for c in changing: sel = selector (c) # sequential base base = FloatVector (base_selector(c)) # base with p = 1 base_p1 = FloatVector (sel(1)) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 for n in cfg.threads: ntimes = FloatVector (sel(n)) # ratio confidence interval labels = ['Base'] * r.length(base)[0] + ['N']*r.length (ntimes)[0] df = DataFrame ({'Times': base + ntimes, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False, 'conf.level': 0.999})[0][0] lowers.append (ratio_test[1][0]) uppers.append (ratio_test[2][0]) mn = mean (ntimes) speedups.append (mean(base) / mn) # plot slowdowns #speedups.append (-mn/base)#(base / mn) thrds.append (n) if change_name == 'Language': changes.append (pretty_langs [c]) else: changes.append (c) df = DataFrame ({'Speedup': FloatVector (speedups), 'Threads': IntVector (thrds), change_name: StrVector (changes), 'Lower': FloatVector (lowers), 'Upper': FloatVector (uppers) }) ideal_changing = ['ideal'] if change_name == 'Language': ideal_changing.extend ([pretty_langs [c] for c in changing]) else: ideal_changing.extend (changing) legendVec = IntVector (range (len (ideal_changing))) legendVec.names = StrVector (ideal_changing) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'Upper', ymin = 'Lower') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=3) +\ ggplot2.aes_string(x='Threads', y='Speedup', group=change_name, color=change_name, shape=change_name) + \ ggplot2.scale_shape_manual(values=legendVec) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2_options () + \ ggplot2_colors () + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Cores")') # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\ pp.plot() r['dev.off']()
## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \ ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \ ggplot2.stat_smooth(color="black") + \ ggplot2.opts(**{'legend.position': 'none'}) + \ ggplot2.scale_x_continuous("Obama Vote Share") + \
## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \ ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \ ggplot2.stat_smooth(color="black") + \ ggplot2.opts(**{'legend.position': 'none'}) + \ ggplot2.scale_x_continuous("Obama Vote Share") + \
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df = self.df #import math, datetime grdevices = importr('grDevices') if not title: title = fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp += ggplot2.aes_string(x=x, y=y, col=col, group=group) elif col: pp += ggplot2.aes_string(x=x, y=y, col=col) elif group: pp += ggplot2.aes_string(x=x, y=y, group=group) else: pp += ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue') else: pp += ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size, position='jitter') else: pp += ggplot2.geom_point(size=size, position='jitter') else: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size) else: pp += ggplot2.geom_point(size=size) if boxplot2: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue', outlier_colour="NA") else: pp += ggplot2.geom_boxplot(color='blue') if smooth: if smooth == 'lm': if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, method='lm', se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, method='lm', se=se) else: if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, se=se) if density: pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..')) if line: pp += ggplot2.geom_line(position='jitter') pp += ggplot2.opts( **{ 'title': title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24, hjust=1) }) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp += ggplot2.scale_colour_hue() if flip: pp += ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: " + fn
# scale_linetype_manual: associate perf types with linetypes for col_i, yscale in enumerate(['log', 'linear']): vp = grid.viewport(**{'layout.pos.col': col_i + 1, 'layout.pos.row': 1}) pp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='variable', y='Performance', color='color', shape='PerfType', linetype='PerfType') + \ ggplot2.ggtitle('Performance vs. Color') + \ ggplot2.theme(**{'legend.key.size' : ro.r.unit(1.4, "lines") } ) + \ ggplot2.scale_colour_manual("Color", values=colormap, breaks=colormap.names, labels=[elt[1] for elt in colormap_labels]) + \ ggplot2.geom_point(size=3) + \ ggplot2.scale_linetype_manual(values=linemap) + \ ggplot2.geom_line(size=1.5) # custom y-axis lines: major lines ("breaks") are every 10^n; 9 # minor lines ("minor_breaks") between major lines if (yscale == 'log'): pp = pp + \ ggplot2.scale_y_log10(breaks = ro.r("10^(%d:%d)" % (gflops_range[0], gflops_range[1])), minor_breaks = ro.r("rep(10^(%d:%d), each=9) * rep(1:9, %d)" % (gflops_range[0] - 1, gflops_range[1], gflops_range[1] - gflops_range[0]))) #pp.plot(vp = vp) #-- ggplot2perfcolor-end grdevices.dev_off()
sampler.run_mcmc(starting_guesses, nsteps) sample = sampler.chain # shape = (nwalkers, nsteps, ndim) ests = [np.mean(sample[:, :, j]) for j in range(ndim)] intercept = ests[0] slope = ests[1] gs = [ests[j + 2] for j in range(len(x))] print gs cut = min(0.5, np.percentile(gs, 15)) typical = [g >= cut for g in gs] pdf = ro.DataFrame({'x': ro.FloatVector(x), \ 'y': ro.FloatVector(y), \ 'e': ro.FloatVector(e), \ 'ymin': ro.FloatVector(y-e), \ 'ymax': ro.FloatVector(y+e), \ 'yest': ro.FloatVector(slope*x+intercept), \ 'typical': ro.BoolVector(typical)}) rprint(pdf) gpf = ggplot2.ggplot(pdf) ppf = gpf + \ ggplot2.geom_point(ggplot2.aes_string(x='x', y='y',\ color='typical',shape='typical'),size=5) + \ ggplot2.geom_errorbar(ggplot2.aes_string(x='x', ymin='ymin', ymax='ymax')) +\ ggplot2.geom_line(ggplot2.aes_string(x='x', y='yest')) grdevices.png(file="fit.png", width=512, height=512) print(ppf) grdevices.dev_off()