Exemple #1
0
def ggplot2_options():
    def normal_text():
        return ggplot2.theme_text(family='serif', size=15)

    def bold_text():
        return ggplot2.theme_text(family='serif', face='bold', size=15)

    def rotated_text():
        return ggplot2.theme_text(family='serif',
                                  face='bold',
                                  size=15,
                                  angle=90,
                                  vjust=0.2)

    return ggplot2.opts(
        **{
            'axis.title.x': ggplot2.theme_blank(),
            'axis.title.y': rotated_text(),
            'axis.text.x': normal_text(),
            'axis.text.y': normal_text(),
            'legend.title': bold_text(),
            'legend.text': normal_text(),
            'aspect.ratio': 0.6180339888,
            'strip.text.x': normal_text(),
        })
Exemple #2
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
Exemple #3
0
def single_locus_net_informativeness(locus_table, net_pi_table, locus):
    qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
    WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table,
            net_pi_table, locus)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \
            ggplot2.geom_point(size = 3, alpha = 0.4) + \
            ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness') + \
            ggplot2.opts(title = locus)

    return plot
def runBoruta():
    base.load("Rcode/zscores.RData")
    base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/trainClassifier/Rcode/borutaRelevance.R')
    outputBoruta = globalenv['findRelevant'](globalenv['massallfeatures'], globalenv['nonmassallfeatures'])

    # generate boxplot comparison of relevant mass features vs. the same non-mass feature
    plotgp = ggplot2.ggplot(outputBoruta.rx2("masszscore_selected")) + \
          ggplot2.aes_string(x='MorN', y='zscores', fill = 'factor(MorN)') + \
          ggplot2.geom_boxplot() + \
          ggplot2.opts(title = "Comparison of Z-scores for Mass confirmed features", y="Z-scores") 
    plotgp.plot()
    
    return
Exemple #5
0
def plot(data, filename, title, ggplotter, xid="N", yid="RunTime", factorid="Step"):
    df = make_dataframe(data, xid, yid, factorid)
    grdevices.pdf(file=filename, width=10, height=6)
    gp = ggplot2.ggplot(df)
    pp = gp + \
        ggplot2.aes_string(x=xid, y=yid) + \
        ggplot2.aes_string(size=.5) + \
        ggplotter() + \
        ggplot2.aes_string(colour='factor(%s)' % factorid) + \
        ggplot2.aes_string(fill='factor(%s)' % factorid) + \
        ggplot2.opts(title=title) + \
        ggplot2.scale_fill_brewer(palette="Set2") + \
        ggplot2.scale_colour_brewer(palette="Set2")
    pp.plot()
    grdevices.dev_off()
Exemple #6
0
def multiple_locus_net_informativeness_facet(locus_table, net_pi_table, loci):
    if loci[0].lower() != 'all':
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table,
            net_pi_table, tuple(loci))
    else:
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id"'''.format(locus_table,
            net_pi_table)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \
        ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), size = 3, \
        alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \
        ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
        ggplot2.opts(**{'legend.position' : 'none'}) + \
        ggplot2.scale_y_continuous('phylogenetic informativeness')
    return plot
Exemple #7
0
def ggplot2_options ():
  def normal_text():
    return ggplot2.theme_text(family = 'serif', size = 15)
  def bold_text():
    return ggplot2.theme_text(family = 'serif', face = 'bold', size = 15)
  def rotated_text():
    return ggplot2.theme_text(family = 'serif', face = 'bold', 
                              size = 15, angle=90, vjust=0.2)

  return ggplot2.opts (**{'axis.title.x' : ggplot2.theme_blank(),
                          'axis.title.y' : rotated_text(),
                          'axis.text.x' : normal_text(),
                          'axis.text.y' : normal_text(),
                          'legend.title' : bold_text(),
                          'legend.text' : normal_text(),
                          'aspect.ratio' : 0.6180339888,
                          'strip.text.x' : normal_text(),
                          })
Exemple #8
0
def ggplot2_options():
    return ggplot2.opts(
        **{
            'axis.title.x':
            ggplot2.theme_blank(),
            'axis.title.y':
            ggplot2.theme_text(
                family='serif', face='bold', size=15, angle=90, vjust=0.2),
            'axis.text.x':
            ggplot2.theme_text(family='serif', size=15),
            'axis.text.y':
            ggplot2.theme_text(family='serif', size=15),
            'legend.title':
            ggplot2.theme_text(family='serif', face='bold', size=15),
            'legend.text':
            ggplot2.theme_text(family='serif', size=15),
            'aspect.ratio':
            0.6180339888,
        })
Exemple #9
0
def retrieve(request):
    stockname = request.POST['stockname']
    q = GoogleIntradayQuote(stockname, 300, 1)
    qlines = q.to_csv().strip().split("\n")
    if not qlines:
        e = "Couldn't find ", stockname, " data"
        return render(request, 'stocks/index.html', {'error_message': e})
    else:
        s = Stock(ticker=q.symbol,
                  start_date=timezone.now() - datetime.timedelta(days=1))
        s.save()
        for tick in q.get_ticks():
            print "tick: ", tick,
            dt = parse_datetime(tick[0])
            p = tick[1]
            print " price: ", p
            s.tick_set.create(time=dt, price=p)
        s.save()

        # tells the R plotting device to write the plot to a file
        f = "static/stocks/images/tmp.png"
        grdevices.png(file=f, width=512, height=512)

        # make a random plot
        rnorm = stats.rnorm
        df = {'value': rnorm(300, mean=0) + rnorm(100, mean=3),
              'other_value': rnorm(300, mean=0) + rnorm(100, mean=3),
              'mean': IntVector([0, ] * 300 + [3, ] * 100)}
        dataf_rnorm = robjects.DataFrame(df)

        gp = ggplot2.ggplot(dataf_rnorm)
        pp = gp + \
            ggplot2.aes_string(x='value', y='other_value') + \
            ggplot2.geom_bin2d() + \
            ggplot2.opts(title='geom_bin2d')
        pp.plot()

        grdevices.dev_off()

        context = {'stockname': stockname,
                   'history': qlines}
        return render(request, 'stocks/retrieve.html', context)
Exemple #10
0
def mem_usage_graph(cfg):
    r = robjects.r
    varis = []
    langs = []
    probs = []
    mems = []
    for var in cfg.variations:
        for lang in cfg.languages:
            for prob in cfg.problems:
                mem_filename = get_mem_output(lang, prob, var)
                with open(mem_filename, 'r') as mem_file:
                    mem = mem_file.readline()
                    mems.append(float(mem))
                varis.append(pretty_varis[var])
                langs.append(pretty_langs[lang])
                probs.append(prob)

    # memory usage is a simple histogram with all information in one graph.
    r.pdf('bargraph-memusage.pdf', height=pdf_height(), width=pdf_width())
    df = robjects.DataFrame({
        'Language': StrVector(langs),
        'Problem': StrVector(probs),
        'Variation': StrVector(varis),
        'Mem': FloatVector(mems)
    })

    gp = ggplot2.ggplot(df)

    # we rotate the x labels to make sure they don't overlap
    pp = gp  +\
        ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \
        ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2.facet_wrap ('Variation') + \
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Memory usage (in bytes)")')# + \

    pp.plot()
    r['dev.off']()
Exemple #11
0
def mem_usage_graph (cfg):
  r = robjects.r
  varis = []
  langs = []
  probs = []
  mems  = []
  for var in cfg.variations:
    for lang in cfg.languages:
      for prob in cfg.problems:
        mem_filename = get_mem_output (lang, prob, var)
        with open (mem_filename, 'r') as mem_file:
          mem = mem_file.readline()
          mems.append (float (mem))
        varis.append (pretty_varis [var])
        langs.append (pretty_langs [lang])
        probs.append (prob)

  # memory usage is a simple histogram with all information in one graph.
  r.pdf ('bargraph-memusage.pdf', height=pdf_height (), width=pdf_width ())
  df = robjects.DataFrame({'Language': StrVector (langs),
                           'Problem': StrVector (probs),
                           'Variation' : StrVector (varis),
                           'Mem' : FloatVector (mems)
                           })

  gp = ggplot2.ggplot (df)

  # we rotate the x labels to make sure they don't overlap
  pp = gp  +\
      ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \
      ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \
      ggplot2.geom_bar (position='dodge', stat='identity') + \
      ggplot2.facet_wrap ('Variation') + \
      ggplot2_options () + \
      ggplot2_colors () + \
      robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
      robjects.r('ylab("Memory usage (in bytes)")')# + \

  pp.plot ()
  r['dev.off']()
Exemple #12
0
def line_plot (cfg, var, control, change_name, changing, selector, base_selector, basis):
  speedups = []
  thrds = []
  changes = []
  lowers = []
  uppers = []

  for n in cfg.threads:
    probs.append ('ideal')
    langs.append ('ideal')
    speedups.append (n)
    thrds.append (n)
    changes.append ('ideal')
    lowers.append (n)
    uppers.append (n)
    
  for c in changing:
    sel  = selector (c)

    # sequential base
    base = FloatVector (base_selector(c))
    # base with p = 1
    base_p1 = FloatVector (sel(1))
    # use fastest sequential program
    if basis == 'fastest' and mean (base_p1) < mean(base):
      base = base_p1
    elif basis == 'seq':
      pass
    elif basis == 'p1':
      base = base_p1
      
    for n in cfg.threads:
      ntimes = FloatVector (sel(n))

      # ratio confidence interval
      labels = ['Base'] * r.length(base)[0] + ['N']*r.length (ntimes)[0]
      df = DataFrame ({'Times': base + ntimes, 
                       'Type': StrVector(labels)})
      ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df,
                                    control='N',
                                    method='Param.ratio',
                                    **{'var.equal': False,
                                    'conf.level': 0.999})[0][0]

      lowers.append (ratio_test[1][0])
      uppers.append (ratio_test[2][0])

      mn = mean (ntimes)      
      speedups.append (mean(base) / mn)
      # plot slowdowns
      #speedups.append (-mn/base)#(base / mn)
      thrds.append (n)
      if change_name == 'Language':
        changes.append (pretty_langs [c])
      else:
        changes.append (c)

  df = DataFrame ({'Speedup': FloatVector (speedups),
                   'Threads': IntVector (thrds),
                   change_name: StrVector (changes),
                   'Lower': FloatVector (lowers),
                   'Upper': FloatVector (uppers)
                   })
  ideal_changing = ['ideal']
  if change_name == 'Language':
    ideal_changing.extend ([pretty_langs [c] for c in changing])
  else:
    ideal_changing.extend (changing)

  legendVec = IntVector (range (len (ideal_changing)))
  legendVec.names = StrVector (ideal_changing)

  gg = ggplot2.ggplot (df)

  limits = ggplot2.aes (ymax = 'Upper', ymin = 'Lower')
  dodge = ggplot2.position_dodge (width=0.9)

  pp = gg + \
      ggplot2.geom_line() + ggplot2.geom_point(size=3) +\
      ggplot2.aes_string(x='Threads', y='Speedup', 
                         group=change_name, color=change_name, 
                         shape=change_name) + \
      ggplot2.scale_shape_manual(values=legendVec) + \
      ggplot2.geom_errorbar (limits, width=0.25) + \
      ggplot2_options () + \
      ggplot2_colors () + \
      ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \
      robjects.r('ylab("Speedup")') + \
      robjects.r('xlab("Cores")')

      # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\
  pp.plot()

  r['dev.off']()
Exemple #13
0
def as_dataframe (cfg, results, basis):
  r = robjects.r
  varis = []
  langs = []
  probs = []
  times = []
  threads = []

  # speedups, with upper and lower bounds below
  speedups = [] 
  speedup_lowers = []
  speedup_uppers = []

  ses = [] # standard errors
  mems = [] # memory usage

  langs_ideal = list (cfg.languages)
  langs_ideal.append ('ideal')

  probs_ideal = list (cfg.problems)
  probs_ideal.append ('ideal')

  for var in cfg.variations:
    for lang in langs_ideal: # cfg.languages:
      for prob in probs_ideal: # cfg.problems:
        for thread in cfg.threads:

          if lang == 'ideal' and prob == 'ideal':
            continue
          elif lang == 'ideal' or prob == 'ideal':
            varis.append (var)
            langs.append (pretty_langs[lang])
            probs.append (prob)
            threads.append (thread)
            speedups.append (thread)
            speedup_lowers.append (thread)
            speedup_uppers.append (thread)
            times.append (0)
            ses.append(0)
            mems.append (0)
            continue

          varis.append (var) # pretty_varis [var])
          langs.append (pretty_langs [lang])
          probs.append (prob)
          threads.append (thread)
          
          if var.find('seq') >= 0:
            thread = cfg.threads[-1]

          vals = FloatVector (results[thread][prob][var][lang][0])
          time = mean (vals)
          times.append (time)

          #
          # time confidence interval
          #
          t_result = r['t.test'] (FloatVector(vals), 
                                  **{" conf.level": 0.999}).rx ('conf.int')[0]
          ses.append ((t_result[1] - t_result[0])/2)

          #
          # memory usage
          #
          mem_filename = get_mem_output (lang, prob, var)
          with open (mem_filename, 'r') as mem_file:
            mem = mem_file.readline()
            mems.append (float (mem))

          # we include dummy data for the sequential case to avoid the 
          # speedup calculation below
          if var.find('seq') >= 0:
            speedups.append (1)
            speedup_lowers.append (1)
            speedup_uppers.append (1)
            continue
            
          #
          # speedup values and confidence intervals
          #
          seq_vals = results[cfg.threads[-1]][prob][var.replace ('par', 'seq')][lang][0]

          # sequential base
          base = FloatVector (seq_vals)
          # base with p = 1
          base_p1 = FloatVector (results[1][prob][var][lang][0])
          # use fastest sequential program
          if basis == 'fastest' and mean (base_p1) < mean(base):
            base = base_p1
          elif basis == 'seq':
            pass
          elif basis == 'p1':
            base = base_p1
      

          labels = ['Base'] * r.length(base)[0] + ['N']*r.length (vals)[0]
          df = DataFrame ({'Times': base + vals, 
                           'Type': StrVector(labels)})
          ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df,
                                        control='N',
                                        method='Param.ratio',
                                        **{'var.equal': False})[0][0]

          speedups.append (mean(base) / time)
          speedup_lowers.append (ratio_test[1][0])
          speedup_uppers.append (ratio_test[2][0])

  df = robjects.DataFrame({'Language': StrVector (langs),
                           'Problem': StrVector (probs),
                           'Variation' : StrVector (varis),
                           'Threads': IntVector (threads),
                           
                           'Time': FloatVector (times),
                           'SE': FloatVector (ses),
                           
                           'Speedup': FloatVector (speedups),
                           'SpeedupLower': FloatVector (speedup_lowers),
                           'SpeedupUpper': FloatVector (speedup_uppers),
                           
                           'Mem' : FloatVector (mems)
                           })


  r.assign ('df', df)

  r ('save (df, file="performance.Rda")')
  
  # reshape the data to make variation not a column itself, but a part of
  # the other columns describe ie, time, speedup, etc.
  #
  # also, remove the 'ideal' problem as we don't want it in this plot.
  df = r('''
redf = reshape (df, 
                timevar="Variation", 
                idvar = c("Language","Problem","Threads"), 
                direction="wide")
redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain"))
redf[which(redf$Problem != "ideal"),]
''')
  
  r.pdf ('speedup-expertpar-all.pdf',
         height=6.5, width=10)

  change_name = 'Language'

  legendVec = IntVector (range (len (langs_ideal)))
  legendVec.names = StrVector (langs_ideal)

  gg = ggplot2.ggplot (df)

  limits = ggplot2.aes (ymax = 'SpeedupUpper.expertpar', ymin = 'SpeedupLower.expertpar')
  dodge = ggplot2.position_dodge (width=0.9)

  pp = gg + \
      ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\
      robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\
      ggplot2.aes_string(x='Threads', y='Speedup.expertpar', 
                         group=change_name, color=change_name, 
                         shape=change_name) + \
      ggplot2.geom_errorbar (limits, width=0.25) + \
      ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2),
                       'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2),
                       'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                       'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10),
                       'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10),
                       'legend.text' : ggplot2.theme_text(family = 'serif', size = 10),
                       'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                       'aspect.ratio' : 1,
                       }) + \
      robjects.r('ylab("Speedup")') + \
      robjects.r('xlab("Number of cores")') + \
      ggplot2.facet_wrap ('Problem', nrow = 2)

  pp.plot()

  r['dev.off']()
Exemple #14
0
#-- ggplot2mtcars-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width = 1000, height = 350, antialias="subpixel", type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_bin2d() + \
     ggplot2.opts(title =  'geom_bin2d')
pp.plot(vp = vp)
#-- ggplot2geombin2d-end

vp = grid.viewport(**{'layout.pos.col':2, 'layout.pos.row': 1})
#-- ggplot2geomdensity2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_density2d() + \
     ggplot2.opts(title =  'geom_density2d')
pp.plot(vp = vp)
#-- ggplot2geomdensity2d-end

vp = grid.viewport(**{'layout.pos.col':3, 'layout.pos.row': 1})
Exemple #15
0


from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop", 
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop", 
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.opts(title = "Benchmark (running time)")


from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width = 712, height = 512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'), data = dataf, 
                  na_action = stats.na_exclude)
Exemple #16
0
	def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False):
		df=self.df
		#import math, datetime
		

		grdevices = importr('grDevices')

		if not title:
			title=fn.split("/")[-1]

		grdevices.png(file=fn, width=w, height=h)
		gp = ggplot2.ggplot(df)
		pp = gp	
		if col and group:
			pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group)
		elif col:
			pp+=ggplot2.aes_string(x=x, y=y,col=col)
		elif group:
			pp+=ggplot2.aes_string(x=x, y=y,group=group)
		else:
			pp+=ggplot2.aes_string(x=x, y=y)	

		if boxplot:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue')
			else:
				pp+=ggplot2.geom_boxplot(color='blue')	

		if point:
			if jitter:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter')
				else:
					pp+=ggplot2.geom_point(size=size,position='jitter')
			else:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size)
				else:
					pp+=ggplot2.geom_point(size=size)


		if boxplot2:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA")
			else:
				pp+=ggplot2.geom_boxplot(color='blue')

		if smooth:
			if smooth=='lm':
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se)
			else:
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,se=se)

		if density:
			pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..'))

		if line:
			pp+=ggplot2.geom_line(position='jitter')


		pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} )
		#pp+=ggplot2.scale_colour_brewer(palette="Set1")
		pp+=ggplot2.scale_colour_hue()
		if flip:
			pp+=ggplot2.coord_flip()



		pp.plot()
		grdevices.dev_off()
		print ">> saved: "+fn
Exemple #17
0
def as_dataframe(cfg, results, basis):
    r = robjects.r
    varis = []
    langs = []
    probs = []
    times = []
    threads = []

    # speedups, with upper and lower bounds below
    speedups = []
    speedup_lowers = []
    speedup_uppers = []

    ses = []  # standard errors
    mems = []  # memory usage

    langs_ideal = list(cfg.languages)
    langs_ideal.append('ideal')

    probs_ideal = list(cfg.problems)
    probs_ideal.append('ideal')

    for var in cfg.variations:
        for lang in langs_ideal:  # cfg.languages:
            for prob in probs_ideal:  # cfg.problems:
                for thread in cfg.threads:

                    if lang == 'ideal' and prob == 'ideal':
                        continue
                    elif lang == 'ideal' or prob == 'ideal':
                        varis.append(var)
                        langs.append(pretty_langs[lang])
                        probs.append(prob)
                        threads.append(thread)
                        speedups.append(thread)
                        speedup_lowers.append(thread)
                        speedup_uppers.append(thread)
                        times.append(0)
                        ses.append(0)
                        mems.append(0)
                        continue

                    varis.append(var)  # pretty_varis [var])
                    langs.append(pretty_langs[lang])
                    probs.append(prob)
                    threads.append(thread)

                    if var.find('seq') >= 0:
                        thread = cfg.threads[-1]

                    vals = FloatVector(results[thread][prob][var][lang][0])
                    time = mean(vals)
                    times.append(time)

                    #
                    # time confidence interval
                    #
                    t_result = r['t.test'](FloatVector(vals), **{
                        " conf.level": 0.999
                    }).rx('conf.int')[0]
                    ses.append((t_result[1] - t_result[0]) / 2)

                    #
                    # memory usage
                    #
                    mem_filename = get_mem_output(lang, prob, var)
                    with open(mem_filename, 'r') as mem_file:
                        mem = mem_file.readline()
                        mems.append(float(mem))

                    # we include dummy data for the sequential case to avoid the
                    # speedup calculation below
                    if var.find('seq') >= 0:
                        speedups.append(1)
                        speedup_lowers.append(1)
                        speedup_uppers.append(1)
                        continue

                    #
                    # speedup values and confidence intervals
                    #
                    seq_vals = results[cfg.threads[-1]][prob][var.replace(
                        'par', 'seq')][lang][0]

                    # sequential base
                    base = FloatVector(seq_vals)
                    # base with p = 1
                    base_p1 = FloatVector(results[1][prob][var][lang][0])
                    # use fastest sequential program
                    if basis == 'fastest' and mean(base_p1) < mean(base):
                        base = base_p1
                    elif basis == 'seq':
                        pass
                    elif basis == 'p1':
                        base = base_p1

                    labels = ['Base'
                              ] * r.length(base)[0] + ['N'] * r.length(vals)[0]
                    df = DataFrame({
                        'Times': base + vals,
                        'Type': StrVector(labels)
                    })
                    ratio_test = r['pairwiseCI'](r('Times ~ Type'),
                                                 data=df,
                                                 control='N',
                                                 method='Param.ratio',
                                                 **{
                                                     'var.equal': False
                                                 })[0][0]

                    speedups.append(mean(base) / time)
                    speedup_lowers.append(ratio_test[1][0])
                    speedup_uppers.append(ratio_test[2][0])

    df = robjects.DataFrame({
        'Language': StrVector(langs),
        'Problem': StrVector(probs),
        'Variation': StrVector(varis),
        'Threads': IntVector(threads),
        'Time': FloatVector(times),
        'SE': FloatVector(ses),
        'Speedup': FloatVector(speedups),
        'SpeedupLower': FloatVector(speedup_lowers),
        'SpeedupUpper': FloatVector(speedup_uppers),
        'Mem': FloatVector(mems)
    })

    r.assign('df', df)

    r('save (df, file="performance.Rda")')

    # reshape the data to make variation not a column itself, but a part of
    # the other columns describe ie, time, speedup, etc.
    #
    # also, remove the 'ideal' problem as we don't want it in this plot.
    df = r('''
redf = reshape (df, 
                timevar="Variation", 
                idvar = c("Language","Problem","Threads"), 
                direction="wide")
redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain"))
redf[which(redf$Problem != "ideal"),]
''')

    r.pdf('speedup-expertpar-all.pdf', height=6.5, width=10)

    change_name = 'Language'

    legendVec = IntVector(range(len(langs_ideal)))
    legendVec.names = StrVector(langs_ideal)

    gg = ggplot2.ggplot(df)

    limits = ggplot2.aes(ymax='SpeedupUpper.expertpar',
                         ymin='SpeedupLower.expertpar')
    dodge = ggplot2.position_dodge(width=0.9)

    pp = gg + \
        ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\
        robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\
        ggplot2.aes_string(x='Threads', y='Speedup.expertpar',
                           group=change_name, color=change_name,
                           shape=change_name) + \
        ggplot2.geom_errorbar (limits, width=0.25) + \
        ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2),
                         'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2),
                         'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                         'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10),
                         'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10),
                         'legend.text' : ggplot2.theme_text(family = 'serif', size = 10),
                         'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                         'aspect.ratio' : 1,
                         }) + \
        robjects.r('ylab("Speedup")') + \
        robjects.r('xlab("Number of cores")') + \
        ggplot2.facet_wrap ('Problem', nrow = 2)

    pp.plot()

    r['dev.off']()
Exemple #18
0
    def plot(self,
             fn,
             x='x',
             y='y',
             col=None,
             group=None,
             w=1100,
             h=800,
             size=2,
             smooth=True,
             point=True,
             jitter=False,
             boxplot=False,
             boxplot2=False,
             title=False,
             flip=False,
             se=False,
             density=False,
             line=False):
        df = self.df
        #import math, datetime

        grdevices = importr('grDevices')

        if not title:
            title = fn.split("/")[-1]

        grdevices.png(file=fn, width=w, height=h)
        gp = ggplot2.ggplot(df)
        pp = gp
        if col and group:
            pp += ggplot2.aes_string(x=x, y=y, col=col, group=group)
        elif col:
            pp += ggplot2.aes_string(x=x, y=y, col=col)
        elif group:
            pp += ggplot2.aes_string(x=x, y=y, group=group)
        else:
            pp += ggplot2.aes_string(x=x, y=y)

        if boxplot:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue')
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if point:
            if jitter:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size,
                                             position='jitter')
                else:
                    pp += ggplot2.geom_point(size=size, position='jitter')
            else:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size)
                else:
                    pp += ggplot2.geom_point(size=size)

        if boxplot2:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue',
                                           outlier_colour="NA")
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if smooth:
            if smooth == 'lm':
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              method='lm',
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue',
                                              size=1,
                                              method='lm',
                                              se=se)
            else:
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue', size=1, se=se)

        if density:
            pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..'))

        if line:
            pp += ggplot2.geom_line(position='jitter')

        pp += ggplot2.opts(
            **{
                'title': title,
                'axis.text.x': ggplot2.theme_text(size=24),
                'axis.text.y': ggplot2.theme_text(size=24, hjust=1)
            })
        #pp+=ggplot2.scale_colour_brewer(palette="Set1")
        pp += ggplot2.scale_colour_hue()
        if flip:
            pp += ggplot2.coord_flip()

        pp.plot()
        grdevices.dev_off()
        print ">> saved: " + fn
Exemple #19
0
              width=1300,
              height=1000)

## plot the map
## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R
## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349)
## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size
p_map = ggplot2.ggplot(IL_final) + \
     ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()

p_map.plot()

## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4)

p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
Exemple #20
0
  # print str(a)
   try:
      if dsumFC.has_key(drug):
         dsumFC[drug]['Fold_Change'].append(math.log10(float(val)))
         dsumY[drug]['Year'].append(yr)
      else:
         dsumFC[drug]= {'Fold_Change': [math.log10(float(val)),]}
         dsumY[drug]= {'Year': [yr,]}
   except:
      print "FAILURE: dsumFC="+str(dsumFC)+"\n\ndsumY="+str(dsumY)
      sys.exit()
drugs = dsumFC.keys()

for x in drugs:
   od = rlc.OrdDict([('Fold_Change',robjects.FloatVector(dsumFC[x]['Fold_Change'])),('Year',robjects.FactorVector(dsumY[x]['Year'])),('Drug',robjects.FactorVector(x))])
grdevices.pdf(file="drugs.pdf",width=7,height=7)
   
   dataf = robjects.DataFrame(od)
   gp3 = ggplot2.ggplot(dataf)
   pp3 = gp3 + ggplot2.scale_fill_brewer(palette='BrBG',name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
  # pp3 = gp3 + ggplot2.scale_colour_hue(h=base.c(180,270),name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
   #+ ggplot2.scale_y_log10()
   pp3.plot()
   grdevices.dev_off()
   
f.close()
print "\nfinished\n"



Exemple #21
0
grdevices = importr('grDevices')
grdevices.png(file='/Users/user/Downloads/data/mapplot.png', width=1300, height=1000)
 
## plot the map
## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R 
## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) 
## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size
p_map = ggplot2.ggplot(IL_final) + \
     ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()
 
p_map.plot()
 
## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4)
 
p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))])
dataf = DataFrame(d)

from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop",
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop",
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.opts(title = "Benchmark (running time)")

from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png', width=712, height=512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'),
                  data=dataf,
                  na_action=stats.na_exclude)

# scale to R's slope
Exemple #23
0
def line_plot(cfg, var, control, change_name, changing, selector,
              base_selector, basis):
    speedups = []
    thrds = []
    changes = []
    lowers = []
    uppers = []

    for n in cfg.threads:
        probs.append('ideal')
        langs.append('ideal')
        speedups.append(n)
        thrds.append(n)
        changes.append('ideal')
        lowers.append(n)
        uppers.append(n)

    for c in changing:
        sel = selector(c)

        # sequential base
        base = FloatVector(base_selector(c))
        # base with p = 1
        base_p1 = FloatVector(sel(1))
        # use fastest sequential program
        if basis == 'fastest' and mean(base_p1) < mean(base):
            base = base_p1
        elif basis == 'seq':
            pass
        elif basis == 'p1':
            base = base_p1

        for n in cfg.threads:
            ntimes = FloatVector(sel(n))

            # ratio confidence interval
            labels = ['Base'] * r.length(base)[0] + ['N'] * r.length(ntimes)[0]
            df = DataFrame({'Times': base + ntimes, 'Type': StrVector(labels)})
            ratio_test = r['pairwiseCI'](r('Times ~ Type'),
                                         data=df,
                                         control='N',
                                         method='Param.ratio',
                                         **{
                                             'var.equal': False,
                                             'conf.level': 0.999
                                         })[0][0]

            lowers.append(ratio_test[1][0])
            uppers.append(ratio_test[2][0])

            mn = mean(ntimes)
            speedups.append(mean(base) / mn)
            # plot slowdowns
            #speedups.append (-mn/base)#(base / mn)
            thrds.append(n)
            if change_name == 'Language':
                changes.append(pretty_langs[c])
            else:
                changes.append(c)

    df = DataFrame({
        'Speedup': FloatVector(speedups),
        'Threads': IntVector(thrds),
        change_name: StrVector(changes),
        'Lower': FloatVector(lowers),
        'Upper': FloatVector(uppers)
    })
    ideal_changing = ['ideal']
    if change_name == 'Language':
        ideal_changing.extend([pretty_langs[c] for c in changing])
    else:
        ideal_changing.extend(changing)

    legendVec = IntVector(range(len(ideal_changing)))
    legendVec.names = StrVector(ideal_changing)

    gg = ggplot2.ggplot(df)

    limits = ggplot2.aes(ymax='Upper', ymin='Lower')
    dodge = ggplot2.position_dodge(width=0.9)

    pp = gg + \
        ggplot2.geom_line() + ggplot2.geom_point(size=3) +\
        ggplot2.aes_string(x='Threads', y='Speedup',
                           group=change_name, color=change_name,
                           shape=change_name) + \
        ggplot2.scale_shape_manual(values=legendVec) + \
        ggplot2.geom_errorbar (limits, width=0.25) + \
        ggplot2_options () + \
        ggplot2_colors () + \
        ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \
        robjects.r('ylab("Speedup")') + \
        robjects.r('xlab("Cores")')

    # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\
    pp.plot()

    r['dev.off']()