def ggplot2_options(): def normal_text(): return ggplot2.theme_text(family='serif', size=15) def bold_text(): return ggplot2.theme_text(family='serif', face='bold', size=15) def rotated_text(): return ggplot2.theme_text(family='serif', face='bold', size=15, angle=90, vjust=0.2) return ggplot2.opts( **{ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': rotated_text(), 'axis.text.x': normal_text(), 'axis.text.y': normal_text(), 'legend.title': bold_text(), 'legend.text': normal_text(), 'aspect.ratio': 0.6180339888, 'strip.text.x': normal_text(), })
def interval(locus_table, interval_table, intervals, loci, boxplot = True): qry = get_interval_query(intervals, loci, locus_table, interval_table) frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry)) # because we're sorting by interval, which is a factor, we need to # explicitly re-sort the data by the first integer value # of the interval. This is a bit cumbersome, because sorting # in R is less than pleasant. sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1])) robjects.r(sort_string) gg_frame = ggplot2.ggplot(robjects.r('''data''')) if boxplot: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \ ggplot2.geom_boxplot(**{ 'outlier.size':0, 'alpha':0.3 } ) + \ ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \ alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') else: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi', fill='locus') + ggplot2.geom_bar() + \ ggplot2.facet_wrap(robjects.Formula('~ locus')) + \ ggplot2.opts(**{ 'axis.text.x':ggplot2.theme_text(angle = -90, hjust = 0), 'legend.position':'none' }) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') return plot
def single_locus_net_informativeness(locus_table, net_pi_table, locus): qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table, net_pi_table, locus) frame = robjects.r('''dbGetQuery(con, {})'''.format(qry)) gg_frame = ggplot2.ggplot(frame) plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \ ggplot2.geom_point(size = 3, alpha = 0.4) + \ ggplot2.scale_x_reverse('years ago') + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.opts(title = locus) return plot
def runBoruta(): base.load("Rcode/zscores.RData") base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/trainClassifier/Rcode/borutaRelevance.R') outputBoruta = globalenv['findRelevant'](globalenv['massallfeatures'], globalenv['nonmassallfeatures']) # generate boxplot comparison of relevant mass features vs. the same non-mass feature plotgp = ggplot2.ggplot(outputBoruta.rx2("masszscore_selected")) + \ ggplot2.aes_string(x='MorN', y='zscores', fill = 'factor(MorN)') + \ ggplot2.geom_boxplot() + \ ggplot2.opts(title = "Comparison of Z-scores for Mass confirmed features", y="Z-scores") plotgp.plot() return
def plot(data, filename, title, ggplotter, xid="N", yid="RunTime", factorid="Step"): df = make_dataframe(data, xid, yid, factorid) grdevices.pdf(file=filename, width=10, height=6) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.aes_string(x=xid, y=yid) + \ ggplot2.aes_string(size=.5) + \ ggplotter() + \ ggplot2.aes_string(colour='factor(%s)' % factorid) + \ ggplot2.aes_string(fill='factor(%s)' % factorid) + \ ggplot2.opts(title=title) + \ ggplot2.scale_fill_brewer(palette="Set2") + \ ggplot2.scale_colour_brewer(palette="Set2") pp.plot() grdevices.dev_off()
def multiple_locus_net_informativeness_facet(locus_table, net_pi_table, loci): if loci[0].lower() != 'all': qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table, net_pi_table, tuple(loci)) else: qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} WHERE {0}.id = {1}.id"'''.format(locus_table, net_pi_table) frame = robjects.r('''dbGetQuery(con, {})'''.format(qry)) gg_frame = ggplot2.ggplot(frame) plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \ ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), size = 3, \ alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \ ggplot2.facet_wrap(robjects.Formula('~ locus')) + \ ggplot2.opts(**{'legend.position' : 'none'}) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') return plot
def ggplot2_options (): def normal_text(): return ggplot2.theme_text(family = 'serif', size = 15) def bold_text(): return ggplot2.theme_text(family = 'serif', face = 'bold', size = 15) def rotated_text(): return ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, angle=90, vjust=0.2) return ggplot2.opts (**{'axis.title.x' : ggplot2.theme_blank(), 'axis.title.y' : rotated_text(), 'axis.text.x' : normal_text(), 'axis.text.y' : normal_text(), 'legend.title' : bold_text(), 'legend.text' : normal_text(), 'aspect.ratio' : 0.6180339888, 'strip.text.x' : normal_text(), })
def ggplot2_options(): return ggplot2.opts( **{ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_text( family='serif', face='bold', size=15, angle=90, vjust=0.2), 'axis.text.x': ggplot2.theme_text(family='serif', size=15), 'axis.text.y': ggplot2.theme_text(family='serif', size=15), 'legend.title': ggplot2.theme_text(family='serif', face='bold', size=15), 'legend.text': ggplot2.theme_text(family='serif', size=15), 'aspect.ratio': 0.6180339888, })
def retrieve(request): stockname = request.POST['stockname'] q = GoogleIntradayQuote(stockname, 300, 1) qlines = q.to_csv().strip().split("\n") if not qlines: e = "Couldn't find ", stockname, " data" return render(request, 'stocks/index.html', {'error_message': e}) else: s = Stock(ticker=q.symbol, start_date=timezone.now() - datetime.timedelta(days=1)) s.save() for tick in q.get_ticks(): print "tick: ", tick, dt = parse_datetime(tick[0]) p = tick[1] print " price: ", p s.tick_set.create(time=dt, price=p) s.save() # tells the R plotting device to write the plot to a file f = "static/stocks/images/tmp.png" grdevices.png(file=f, width=512, height=512) # make a random plot rnorm = stats.rnorm df = {'value': rnorm(300, mean=0) + rnorm(100, mean=3), 'other_value': rnorm(300, mean=0) + rnorm(100, mean=3), 'mean': IntVector([0, ] * 300 + [3, ] * 100)} dataf_rnorm = robjects.DataFrame(df) gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_bin2d() + \ ggplot2.opts(title='geom_bin2d') pp.plot() grdevices.dev_off() context = {'stockname': stockname, 'history': qlines} return render(request, 'stocks/retrieve.html', context)
def mem_usage_graph(cfg): r = robjects.r varis = [] langs = [] probs = [] mems = [] for var in cfg.variations: for lang in cfg.languages: for prob in cfg.problems: mem_filename = get_mem_output(lang, prob, var) with open(mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append(float(mem)) varis.append(pretty_varis[var]) langs.append(pretty_langs[lang]) probs.append(prob) # memory usage is a simple histogram with all information in one graph. r.pdf('bargraph-memusage.pdf', height=pdf_height(), width=pdf_width()) df = robjects.DataFrame({ 'Language': StrVector(langs), 'Problem': StrVector(probs), 'Variation': StrVector(varis), 'Mem': FloatVector(mems) }) gp = ggplot2.ggplot(df) # we rotate the x labels to make sure they don't overlap pp = gp +\ ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \ ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \ ggplot2.geom_bar (position='dodge', stat='identity') + \ ggplot2.facet_wrap ('Variation') + \ ggplot2_options () + \ ggplot2_colors () + \ robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\ robjects.r('ylab("Memory usage (in bytes)")')# + \ pp.plot() r['dev.off']()
def mem_usage_graph (cfg): r = robjects.r varis = [] langs = [] probs = [] mems = [] for var in cfg.variations: for lang in cfg.languages: for prob in cfg.problems: mem_filename = get_mem_output (lang, prob, var) with open (mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append (float (mem)) varis.append (pretty_varis [var]) langs.append (pretty_langs [lang]) probs.append (prob) # memory usage is a simple histogram with all information in one graph. r.pdf ('bargraph-memusage.pdf', height=pdf_height (), width=pdf_width ()) df = robjects.DataFrame({'Language': StrVector (langs), 'Problem': StrVector (probs), 'Variation' : StrVector (varis), 'Mem' : FloatVector (mems) }) gp = ggplot2.ggplot (df) # we rotate the x labels to make sure they don't overlap pp = gp +\ ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \ ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \ ggplot2.geom_bar (position='dodge', stat='identity') + \ ggplot2.facet_wrap ('Variation') + \ ggplot2_options () + \ ggplot2_colors () + \ robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\ robjects.r('ylab("Memory usage (in bytes)")')# + \ pp.plot () r['dev.off']()
def line_plot (cfg, var, control, change_name, changing, selector, base_selector, basis): speedups = [] thrds = [] changes = [] lowers = [] uppers = [] for n in cfg.threads: probs.append ('ideal') langs.append ('ideal') speedups.append (n) thrds.append (n) changes.append ('ideal') lowers.append (n) uppers.append (n) for c in changing: sel = selector (c) # sequential base base = FloatVector (base_selector(c)) # base with p = 1 base_p1 = FloatVector (sel(1)) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 for n in cfg.threads: ntimes = FloatVector (sel(n)) # ratio confidence interval labels = ['Base'] * r.length(base)[0] + ['N']*r.length (ntimes)[0] df = DataFrame ({'Times': base + ntimes, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False, 'conf.level': 0.999})[0][0] lowers.append (ratio_test[1][0]) uppers.append (ratio_test[2][0]) mn = mean (ntimes) speedups.append (mean(base) / mn) # plot slowdowns #speedups.append (-mn/base)#(base / mn) thrds.append (n) if change_name == 'Language': changes.append (pretty_langs [c]) else: changes.append (c) df = DataFrame ({'Speedup': FloatVector (speedups), 'Threads': IntVector (thrds), change_name: StrVector (changes), 'Lower': FloatVector (lowers), 'Upper': FloatVector (uppers) }) ideal_changing = ['ideal'] if change_name == 'Language': ideal_changing.extend ([pretty_langs [c] for c in changing]) else: ideal_changing.extend (changing) legendVec = IntVector (range (len (ideal_changing))) legendVec.names = StrVector (ideal_changing) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'Upper', ymin = 'Lower') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=3) +\ ggplot2.aes_string(x='Threads', y='Speedup', group=change_name, color=change_name, shape=change_name) + \ ggplot2.scale_shape_manual(values=legendVec) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2_options () + \ ggplot2_colors () + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Cores")') # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\ pp.plot() r['dev.off']()
def as_dataframe (cfg, results, basis): r = robjects.r varis = [] langs = [] probs = [] times = [] threads = [] # speedups, with upper and lower bounds below speedups = [] speedup_lowers = [] speedup_uppers = [] ses = [] # standard errors mems = [] # memory usage langs_ideal = list (cfg.languages) langs_ideal.append ('ideal') probs_ideal = list (cfg.problems) probs_ideal.append ('ideal') for var in cfg.variations: for lang in langs_ideal: # cfg.languages: for prob in probs_ideal: # cfg.problems: for thread in cfg.threads: if lang == 'ideal' and prob == 'ideal': continue elif lang == 'ideal' or prob == 'ideal': varis.append (var) langs.append (pretty_langs[lang]) probs.append (prob) threads.append (thread) speedups.append (thread) speedup_lowers.append (thread) speedup_uppers.append (thread) times.append (0) ses.append(0) mems.append (0) continue varis.append (var) # pretty_varis [var]) langs.append (pretty_langs [lang]) probs.append (prob) threads.append (thread) if var.find('seq') >= 0: thread = cfg.threads[-1] vals = FloatVector (results[thread][prob][var][lang][0]) time = mean (vals) times.append (time) # # time confidence interval # t_result = r['t.test'] (FloatVector(vals), **{" conf.level": 0.999}).rx ('conf.int')[0] ses.append ((t_result[1] - t_result[0])/2) # # memory usage # mem_filename = get_mem_output (lang, prob, var) with open (mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append (float (mem)) # we include dummy data for the sequential case to avoid the # speedup calculation below if var.find('seq') >= 0: speedups.append (1) speedup_lowers.append (1) speedup_uppers.append (1) continue # # speedup values and confidence intervals # seq_vals = results[cfg.threads[-1]][prob][var.replace ('par', 'seq')][lang][0] # sequential base base = FloatVector (seq_vals) # base with p = 1 base_p1 = FloatVector (results[1][prob][var][lang][0]) # use fastest sequential program if basis == 'fastest' and mean (base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 labels = ['Base'] * r.length(base)[0] + ['N']*r.length (vals)[0] df = DataFrame ({'Times': base + vals, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{'var.equal': False})[0][0] speedups.append (mean(base) / time) speedup_lowers.append (ratio_test[1][0]) speedup_uppers.append (ratio_test[2][0]) df = robjects.DataFrame({'Language': StrVector (langs), 'Problem': StrVector (probs), 'Variation' : StrVector (varis), 'Threads': IntVector (threads), 'Time': FloatVector (times), 'SE': FloatVector (ses), 'Speedup': FloatVector (speedups), 'SpeedupLower': FloatVector (speedup_lowers), 'SpeedupUpper': FloatVector (speedup_uppers), 'Mem' : FloatVector (mems) }) r.assign ('df', df) r ('save (df, file="performance.Rda")') # reshape the data to make variation not a column itself, but a part of # the other columns describe ie, time, speedup, etc. # # also, remove the 'ideal' problem as we don't want it in this plot. df = r(''' redf = reshape (df, timevar="Variation", idvar = c("Language","Problem","Threads"), direction="wide") redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain")) redf[which(redf$Problem != "ideal"),] ''') r.pdf ('speedup-expertpar-all.pdf', height=6.5, width=10) change_name = 'Language' legendVec = IntVector (range (len (langs_ideal))) legendVec.names = StrVector (langs_ideal) gg = ggplot2.ggplot (df) limits = ggplot2.aes (ymax = 'SpeedupUpper.expertpar', ymin = 'SpeedupLower.expertpar') dodge = ggplot2.position_dodge (width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\ robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\ ggplot2.aes_string(x='Threads', y='Speedup.expertpar', group=change_name, color=change_name, shape=change_name) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2), 'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2), 'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10), 'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10), 'legend.text' : ggplot2.theme_text(family = 'serif', size = 10), 'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'aspect.ratio' : 1, }) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Number of cores")') + \ ggplot2.facet_wrap ('Problem', nrow = 2) pp.plot() r['dev.off']()
#-- ggplot2mtcars-end grdevices.dev_off() grdevices.png('../../_static/graphics_ggplot2geombin2d.png', width = 1000, height = 350, antialias="subpixel", type="cairo") grid.newpage() grid.viewport(layout=grid.layout(1, 3)).push() vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1}) #-- ggplot2geombin2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_bin2d() + \ ggplot2.opts(title = 'geom_bin2d') pp.plot(vp = vp) #-- ggplot2geombin2d-end vp = grid.viewport(**{'layout.pos.col':2, 'layout.pos.row': 1}) #-- ggplot2geomdensity2d-begin gp = ggplot2.ggplot(dataf_rnorm) pp = gp + \ ggplot2.aes_string(x='value', y='other_value') + \ ggplot2.geom_density2d() + \ ggplot2.opts(title = 'geom_density2d') pp.plot(vp = vp) #-- ggplot2geomdensity2d-end vp = grid.viewport(**{'layout.pos.col':3, 'layout.pos.row': 1})
from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width = 712, height = 512) p.plot() grdevices.dev_off() #base = importr("base") stats = importr('stats') nlme = importr("nlme") fit = nlme.lmList(Formula('time ~ n_loop | group'), data = dataf, na_action = stats.na_exclude)
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df=self.df #import math, datetime grdevices = importr('grDevices') if not title: title=fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group) elif col: pp+=ggplot2.aes_string(x=x, y=y,col=col) elif group: pp+=ggplot2.aes_string(x=x, y=y,group=group) else: pp+=ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue') else: pp+=ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter') else: pp+=ggplot2.geom_point(size=size,position='jitter') else: if col: pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size) else: pp+=ggplot2.geom_point(size=size) if boxplot2: if col: pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA") else: pp+=ggplot2.geom_boxplot(color='blue') if smooth: if smooth=='lm': if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se) else: if col: pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se) else: pp+=ggplot2.stat_smooth(col='blue',size=1,se=se) if density: pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..')) if line: pp+=ggplot2.geom_line(position='jitter') pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} ) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp+=ggplot2.scale_colour_hue() if flip: pp+=ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: "+fn
def as_dataframe(cfg, results, basis): r = robjects.r varis = [] langs = [] probs = [] times = [] threads = [] # speedups, with upper and lower bounds below speedups = [] speedup_lowers = [] speedup_uppers = [] ses = [] # standard errors mems = [] # memory usage langs_ideal = list(cfg.languages) langs_ideal.append('ideal') probs_ideal = list(cfg.problems) probs_ideal.append('ideal') for var in cfg.variations: for lang in langs_ideal: # cfg.languages: for prob in probs_ideal: # cfg.problems: for thread in cfg.threads: if lang == 'ideal' and prob == 'ideal': continue elif lang == 'ideal' or prob == 'ideal': varis.append(var) langs.append(pretty_langs[lang]) probs.append(prob) threads.append(thread) speedups.append(thread) speedup_lowers.append(thread) speedup_uppers.append(thread) times.append(0) ses.append(0) mems.append(0) continue varis.append(var) # pretty_varis [var]) langs.append(pretty_langs[lang]) probs.append(prob) threads.append(thread) if var.find('seq') >= 0: thread = cfg.threads[-1] vals = FloatVector(results[thread][prob][var][lang][0]) time = mean(vals) times.append(time) # # time confidence interval # t_result = r['t.test'](FloatVector(vals), **{ " conf.level": 0.999 }).rx('conf.int')[0] ses.append((t_result[1] - t_result[0]) / 2) # # memory usage # mem_filename = get_mem_output(lang, prob, var) with open(mem_filename, 'r') as mem_file: mem = mem_file.readline() mems.append(float(mem)) # we include dummy data for the sequential case to avoid the # speedup calculation below if var.find('seq') >= 0: speedups.append(1) speedup_lowers.append(1) speedup_uppers.append(1) continue # # speedup values and confidence intervals # seq_vals = results[cfg.threads[-1]][prob][var.replace( 'par', 'seq')][lang][0] # sequential base base = FloatVector(seq_vals) # base with p = 1 base_p1 = FloatVector(results[1][prob][var][lang][0]) # use fastest sequential program if basis == 'fastest' and mean(base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 labels = ['Base' ] * r.length(base)[0] + ['N'] * r.length(vals)[0] df = DataFrame({ 'Times': base + vals, 'Type': StrVector(labels) }) ratio_test = r['pairwiseCI'](r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{ 'var.equal': False })[0][0] speedups.append(mean(base) / time) speedup_lowers.append(ratio_test[1][0]) speedup_uppers.append(ratio_test[2][0]) df = robjects.DataFrame({ 'Language': StrVector(langs), 'Problem': StrVector(probs), 'Variation': StrVector(varis), 'Threads': IntVector(threads), 'Time': FloatVector(times), 'SE': FloatVector(ses), 'Speedup': FloatVector(speedups), 'SpeedupLower': FloatVector(speedup_lowers), 'SpeedupUpper': FloatVector(speedup_uppers), 'Mem': FloatVector(mems) }) r.assign('df', df) r('save (df, file="performance.Rda")') # reshape the data to make variation not a column itself, but a part of # the other columns describe ie, time, speedup, etc. # # also, remove the 'ideal' problem as we don't want it in this plot. df = r(''' redf = reshape (df, timevar="Variation", idvar = c("Language","Problem","Threads"), direction="wide") redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain")) redf[which(redf$Problem != "ideal"),] ''') r.pdf('speedup-expertpar-all.pdf', height=6.5, width=10) change_name = 'Language' legendVec = IntVector(range(len(langs_ideal))) legendVec.names = StrVector(langs_ideal) gg = ggplot2.ggplot(df) limits = ggplot2.aes(ymax='SpeedupUpper.expertpar', ymin='SpeedupLower.expertpar') dodge = ggplot2.position_dodge(width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\ robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\ ggplot2.aes_string(x='Threads', y='Speedup.expertpar', group=change_name, color=change_name, shape=change_name) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2), 'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2), 'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10), 'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10), 'legend.text' : ggplot2.theme_text(family = 'serif', size = 10), 'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10), 'aspect.ratio' : 1, }) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Number of cores")') + \ ggplot2.facet_wrap ('Problem', nrow = 2) pp.plot() r['dev.off']()
def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False): df = self.df #import math, datetime grdevices = importr('grDevices') if not title: title = fn.split("/")[-1] grdevices.png(file=fn, width=w, height=h) gp = ggplot2.ggplot(df) pp = gp if col and group: pp += ggplot2.aes_string(x=x, y=y, col=col, group=group) elif col: pp += ggplot2.aes_string(x=x, y=y, col=col) elif group: pp += ggplot2.aes_string(x=x, y=y, group=group) else: pp += ggplot2.aes_string(x=x, y=y) if boxplot: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue') else: pp += ggplot2.geom_boxplot(color='blue') if point: if jitter: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size, position='jitter') else: pp += ggplot2.geom_point(size=size, position='jitter') else: if col: pp += ggplot2.geom_point(ggplot2.aes_string(fill=col, col=col), size=size) else: pp += ggplot2.geom_point(size=size) if boxplot2: if col: pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col), color='blue', outlier_colour="NA") else: pp += ggplot2.geom_boxplot(color='blue') if smooth: if smooth == 'lm': if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, method='lm', se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, method='lm', se=se) else: if col: pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col), size=1, se=se) else: pp += ggplot2.stat_smooth(col='blue', size=1, se=se) if density: pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..')) if line: pp += ggplot2.geom_line(position='jitter') pp += ggplot2.opts( **{ 'title': title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24, hjust=1) }) #pp+=ggplot2.scale_colour_brewer(palette="Set1") pp += ggplot2.scale_colour_hue() if flip: pp += ggplot2.coord_flip() pp.plot() grdevices.dev_off() print ">> saved: " + fn
width=1300, height=1000) ## plot the map ## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
# print str(a) try: if dsumFC.has_key(drug): dsumFC[drug]['Fold_Change'].append(math.log10(float(val))) dsumY[drug]['Year'].append(yr) else: dsumFC[drug]= {'Fold_Change': [math.log10(float(val)),]} dsumY[drug]= {'Year': [yr,]} except: print "FAILURE: dsumFC="+str(dsumFC)+"\n\ndsumY="+str(dsumY) sys.exit() drugs = dsumFC.keys() for x in drugs: od = rlc.OrdDict([('Fold_Change',robjects.FloatVector(dsumFC[x]['Fold_Change'])),('Year',robjects.FactorVector(dsumY[x]['Year'])),('Drug',robjects.FactorVector(x))]) grdevices.pdf(file="drugs.pdf",width=7,height=7) dataf = robjects.DataFrame(od) gp3 = ggplot2.ggplot(dataf) pp3 = gp3 + ggplot2.scale_fill_brewer(palette='BrBG',name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') + ggplot2.geom_boxplot() + ggplot2.opts(title = x+" Yearly Trend") # pp3 = gp3 + ggplot2.scale_colour_hue(h=base.c(180,270),name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') + ggplot2.geom_boxplot() + ggplot2.opts(title = x+" Yearly Trend") #+ ggplot2.scale_y_log10() pp3.plot() grdevices.dev_off() f.close() print "\nfinished\n"
grdevices = importr('grDevices') grdevices.png(file='/Users/user/Downloads/data/mapplot.png', width=1300, height=1000) ## plot the map ## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
[d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width=712, height=512) p.plot() grdevices.dev_off() #base = importr("base") stats = importr('stats') nlme = importr("nlme") fit = nlme.lmList(Formula('time ~ n_loop | group'), data=dataf, na_action=stats.na_exclude) # scale to R's slope
def line_plot(cfg, var, control, change_name, changing, selector, base_selector, basis): speedups = [] thrds = [] changes = [] lowers = [] uppers = [] for n in cfg.threads: probs.append('ideal') langs.append('ideal') speedups.append(n) thrds.append(n) changes.append('ideal') lowers.append(n) uppers.append(n) for c in changing: sel = selector(c) # sequential base base = FloatVector(base_selector(c)) # base with p = 1 base_p1 = FloatVector(sel(1)) # use fastest sequential program if basis == 'fastest' and mean(base_p1) < mean(base): base = base_p1 elif basis == 'seq': pass elif basis == 'p1': base = base_p1 for n in cfg.threads: ntimes = FloatVector(sel(n)) # ratio confidence interval labels = ['Base'] * r.length(base)[0] + ['N'] * r.length(ntimes)[0] df = DataFrame({'Times': base + ntimes, 'Type': StrVector(labels)}) ratio_test = r['pairwiseCI'](r('Times ~ Type'), data=df, control='N', method='Param.ratio', **{ 'var.equal': False, 'conf.level': 0.999 })[0][0] lowers.append(ratio_test[1][0]) uppers.append(ratio_test[2][0]) mn = mean(ntimes) speedups.append(mean(base) / mn) # plot slowdowns #speedups.append (-mn/base)#(base / mn) thrds.append(n) if change_name == 'Language': changes.append(pretty_langs[c]) else: changes.append(c) df = DataFrame({ 'Speedup': FloatVector(speedups), 'Threads': IntVector(thrds), change_name: StrVector(changes), 'Lower': FloatVector(lowers), 'Upper': FloatVector(uppers) }) ideal_changing = ['ideal'] if change_name == 'Language': ideal_changing.extend([pretty_langs[c] for c in changing]) else: ideal_changing.extend(changing) legendVec = IntVector(range(len(ideal_changing))) legendVec.names = StrVector(ideal_changing) gg = ggplot2.ggplot(df) limits = ggplot2.aes(ymax='Upper', ymin='Lower') dodge = ggplot2.position_dodge(width=0.9) pp = gg + \ ggplot2.geom_line() + ggplot2.geom_point(size=3) +\ ggplot2.aes_string(x='Threads', y='Speedup', group=change_name, color=change_name, shape=change_name) + \ ggplot2.scale_shape_manual(values=legendVec) + \ ggplot2.geom_errorbar (limits, width=0.25) + \ ggplot2_options () + \ ggplot2_colors () + \ ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \ robjects.r('ylab("Speedup")') + \ robjects.r('xlab("Cores")') # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\ pp.plot() r['dev.off']()