Example #1
0
def bargraph_language(results):
    r = robjects.r

    for language in languages:
        varis = []
        probs = []
        locs = []
        for (lang, prob, var) in results.keys():
            if lang == language:
                loc = results[(lang, prob, var)]
                varis.append(pretty_varis[var])
                probs.append(prob)
                locs.append(loc)
        r.pdf('bargraph-loc-lang-' + language + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Variation': StrVector(varis),
            'Problem': StrVector(probs),
            'Lines': IntVector(locs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Lines', fill='Variation') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Lines of Code")')
        pp.plot()
        r['dev.off']()
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
Example #3
0
def compare_sum_barplot(locus_table, interval_table, intervals, loci, names,
        rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    #pdb.set_trace()
    frame2 = robjects.r('''agg_data <- aggregate(pi ~ interval + db, data = data, sum)''')
    if len(intervals) > 1:
        sort_string = '''agg_data$interval <- factor(agg_data$interval,{})'''.format(order_intervals(frame2[0]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''agg_data'''))
    plot = gg_frame + \
        ggplot2.aes_string(
                x = 'interval', 
                y = 'pi',
                fill='factor(db)'
            ) + \
        ggplot2.geom_bar(**{
            'position':'dodge',
            'colour':'#767676',
            'alpha':0.6
            }
        ) + \
        ggplot2.scale_y_continuous('net phylogenetic informativeness') + \
        ggplot2.scale_x_discrete('interval (years ago)') + \
        ggplot2.scale_fill_brewer("database", palette="Blues")
    return plot
Example #4
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
Example #5
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options,args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len,max_len+1))
    counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)])
    
    df = ro.DataFrame({'length':len_r, 'counts':counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
Example #7
0
	def barPlot(self, dataframe, filename, x_parm, y_parm): 
		
		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=x_parm,y=y_parm)
		geom = ggplot2.geom_bar(stat = "identity")
		gg = data + aes + geom
		gg.plot()
		grdevices.dev_off()
Example #8
0
    def barPlot(self, dataframe, filename, x_parm, y_parm):

        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(x=x_parm, y=y_parm)
        geom = ggplot2.geom_bar(stat="identity")
        gg = data + aes + geom
        gg.plot()
        grdevices.dev_off()
Example #9
0
def groupBar(fi_data):
    dev_off = robjects.r('dev.off')
    read_delim = robjects.r('read.delim')
    #print(fi_data)
    class_data = read_delim(fi_data, header=True, stringsAsFactors=False)
    robjects.r.assign('class.data', class_data)
    robjects.r.pdf(fi_data + ".Bar.pdf")
    robjects.r('class_data <- class.data')
    class_data = robjects.r('class_data')
    ggplot2.theme = SignatureTranslatedFunction(ggplot2.theme, init_prm_translate={'axis_text_x': 'axis.text.x', 'axis_text_y': 'axis.text.y', 'axis_text_fill': 'axis.text.fill'})
    bar = ggplot2.ggplot(class_data) + ggplot2.geom_bar(stat='identity', position='dodge') + ggplot2.aes_string(x='Class',y='Percent',fill='Group') + ggplot2.theme(axis_text_x=ggplot2.element_text(angle=90, hjust=1))
    bar.plot()
    dev_off()
Example #10
0
def bargraph_variation_diff():
    r = robjects.r

    for (standard, expert) in [('seq', 'expertseq'), ('par', 'expertpar')]:
        langs = []
        probs = []
        diffs = []
        for lang in languages:
            for prob in problems:
                error = False
                try:
                    time = result[lang][prob][standard]
                except KeyError:
                    error = True
                try:
                    time_expert = result[lang][prob][expert]
                except KeyError:
                    error = True

                if not error:
                    diff = (float(time_expert + time) / float(time) - 1)
                else:
                    diff = 0

                langs.append(pretty_langs[lang])
                probs.append(prob)
                diffs.append(diff)

        r.pdf('bargraph-codingtime-diff-' + standard + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Difference': FloatVector(diffs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('ylab("Coding time difference (in percent)")') +\
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('scale_y_continuous(labels = percent_format())')
        pp.plot()
        r['dev.off']()
Example #11
0
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Example #12
0
def bargraph_language(cfg, values):
    r = robjects.r
    for lang in cfg.languages:
        times = []
        varss = []
        probs = []
        ses = []

        for prob in cfg.problems:
            for var in cfg.variations:
                # we use the pretty names to make the
                varss.append(pretty_varis[var])
                probs.append(prob)

                data = FloatVector(values[prob][var][lang][0])
                times.append(r['mean'](data)[0])

                t_result = r['t.test'](data, **{
                    " conf.level": 0.999
                }).rx('conf.int')[0]
                ses.append((t_result[1] - t_result[0]) / 2)

        r.pdf('bargraph-executiontime-lang-' + lang + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Variation': StrVector(varss),
            'Problem': StrVector(probs),
            'Time': FloatVector(times),
            'SE': FloatVector(ses)
        })

        limits = ggplot2.aes(ymax='Time + SE', ymin='Time - SE')
        dodge = ggplot2.position_dodge(width=0.9)

        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Time', fill='Variation') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2.geom_errorbar (limits, position=dodge, width=0.25) + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Execution time (in seconds)")')
        pp.plot()
        r['dev.off']()
Example #13
0
def bargraph_language (cfg, values):
  r = robjects.r
  for lang in cfg.languages:
    times = []
    varss = []
    probs = []
    ses   = []

    for prob in cfg.problems:
      for var in cfg.variations:
        # we use the pretty names to make the 
        varss.append (pretty_varis [var])
        probs.append (prob)

        data = FloatVector (values[prob][var][lang][0])
        times.append (r['mean'] (data)[0])

        t_result = r['t.test'] (data, 
                                **{" conf.level": 0.999}).rx ('conf.int')[0]
        ses.append ((t_result[1] - t_result[0])/2)



    r.pdf ('bargraph-executiontime-lang-' + lang + '.pdf', 
           height=pdf_height (), width=pdf_width ())
    df = robjects.DataFrame({'Variation': StrVector (varss),
                             'Problem': StrVector (probs),
                             'Time' : FloatVector (times),
                             'SE' : FloatVector (ses)
                             })

    limits = ggplot2.aes (ymax = 'Time + SE', ymin = 'Time - SE')
    dodge = ggplot2.position_dodge (width=0.9)

    gp = ggplot2.ggplot (df)

    pp = gp + \
        ggplot2.aes_string (x='Problem', y='Time', fill='Variation') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2.geom_errorbar (limits, position=dodge, width=0.25) + \
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Execution time (in seconds)")') 
    pp.plot ()
    r['dev.off']()
Example #14
0
def bargraph_language():
    r = robjects.r

    for language in languages:
        varis = []
        probs = []
        times = []
        for prob in problems:
            for var in variations:
                try:
                    time = result[language][prob][var]
                except KeyError:
                    time = 0

                # for the expert times, add expert and non-expert times together
                if var.startswith('expert'):
                    try:
                        time = time + result[language][prob][var.replace(
                            'expert', '')]
                    except KeyError:
                        pass

                varis.append(pretty_varis[var])
                probs.append(prob)
                times.append(time)
        r.pdf('bargraph-codingtime-lang-' + language + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Variation': StrVector(varis),
            'Problem': StrVector(probs),
            'Time': IntVector(times),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Time', fill='Variation') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Coding time (in minutes)")')
        pp.plot()
        r['dev.off']()
Example #15
0
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Example #16
0
def bargraph_variation_diff(cfg, values):
    r = robjects.r

    for (standard, expert) in [('seq', 'expertseq'), ('par', 'expertpar')]:
        langs = []
        probs = []
        diffs = []
        for lang in cfg.languages:
            for prob in cfg.problems:
                data = FloatVector(values[prob][standard][lang][0])
                data_expert = FloatVector(values[prob][expert][lang][0])

                mean = r['mean'](data)[0]
                mean_expert = r['mean'](data_expert)[0]
                diff = (float(mean_expert) / float(mean) - 1)

                langs.append(pretty_langs[lang])
                probs.append(prob)
                diffs.append(diff)

        r.pdf('bargraph-executiontime-diff-' + standard + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Difference': FloatVector(diffs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Execution time difference (in percent)")') +\
            robjects.r('scale_y_continuous(labels = percent_format())')
        pp.plot()
        r['dev.off']()
Example #17
0
def bargraph_variation_norm(results):
    r = robjects.r

    for variation in variations:
        langs = []
        probs = []
        locs = []
        for problem in problems:
            results_filtered = {
                key: results[key]
                for key in [(lang, problem, variation) for lang in languages]
            }
            loc_min = min(results_filtered.values())

            for (lang, prob, var) in results_filtered.keys():
                loc_norm = (float(
                    results_filtered[(lang, prob, var)])) / float(loc_min)
                langs.append(pretty_langs[lang])
                probs.append(prob)
                locs.append(loc_norm)

        r.pdf('bargraph-loc-var-norm-' + variation + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Lines': FloatVector(locs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Lines', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Lines of Code (normalized to smallest)")')
        pp.plot()
        r['dev.off']()
Example #18
0
def mem_usage_graph(cfg):
    r = robjects.r
    varis = []
    langs = []
    probs = []
    mems = []
    for var in cfg.variations:
        for lang in cfg.languages:
            for prob in cfg.problems:
                mem_filename = get_mem_output(lang, prob, var)
                with open(mem_filename, 'r') as mem_file:
                    mem = mem_file.readline()
                    mems.append(float(mem))
                varis.append(pretty_varis[var])
                langs.append(pretty_langs[lang])
                probs.append(prob)

    # memory usage is a simple histogram with all information in one graph.
    r.pdf('bargraph-memusage.pdf', height=pdf_height(), width=pdf_width())
    df = robjects.DataFrame({
        'Language': StrVector(langs),
        'Problem': StrVector(probs),
        'Variation': StrVector(varis),
        'Mem': FloatVector(mems)
    })

    gp = ggplot2.ggplot(df)

    # we rotate the x labels to make sure they don't overlap
    pp = gp  +\
        ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \
        ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2.facet_wrap ('Variation') + \
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Memory usage (in bytes)")')# + \

    pp.plot()
    r['dev.off']()
Example #19
0
def mem_usage_graph (cfg):
  r = robjects.r
  varis = []
  langs = []
  probs = []
  mems  = []
  for var in cfg.variations:
    for lang in cfg.languages:
      for prob in cfg.problems:
        mem_filename = get_mem_output (lang, prob, var)
        with open (mem_filename, 'r') as mem_file:
          mem = mem_file.readline()
          mems.append (float (mem))
        varis.append (pretty_varis [var])
        langs.append (pretty_langs [lang])
        probs.append (prob)

  # memory usage is a simple histogram with all information in one graph.
  r.pdf ('bargraph-memusage.pdf', height=pdf_height (), width=pdf_width ())
  df = robjects.DataFrame({'Language': StrVector (langs),
                           'Problem': StrVector (probs),
                           'Variation' : StrVector (varis),
                           'Mem' : FloatVector (mems)
                           })

  gp = ggplot2.ggplot (df)

  # we rotate the x labels to make sure they don't overlap
  pp = gp  +\
      ggplot2.opts (**{'axis.text.x': ggplot2.theme_text (angle = 90, hjust=1)}) + \
      ggplot2.aes_string (x='Problem', y='Mem', fill='Language') + \
      ggplot2.geom_bar (position='dodge', stat='identity') + \
      ggplot2.facet_wrap ('Variation') + \
      ggplot2_options () + \
      ggplot2_colors () + \
      robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
      robjects.r('ylab("Memory usage (in bytes)")')# + \

  pp.plot ()
  r['dev.off']()
Example #20
0
def singleTablePlot_gg(parser, args):
    ''' kmerdict is a defaultdict(int)
        It can take both empty and non-empty kmerdicts
        returns update of the input kmerdict given the input string and k'''
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')
    kmerdict = kmercount_in_table(args.table1)
    data = defaultdict(list)
    numKmers = len(kmerdict)
    for k in sorted(kmerdict.keys()):
        data['kmers'].append(k)
        data['counts'].append(kmerdict[k])
    df = robjects.DataFrame(data)
    gp = ggplot2.ggplot(df)
##    pp = gp + ggplot2.geom_bar(stat="identity")
    pp = gp + ggplot2.aes_string(x=range(1,numKmers+1),y=data['counts']) \
         + ggplot2.geom_bar(stat="identity") \
         + ggplot2.scale_x_continuous(name="kmer", breaks=0.5+(range(1,numKmers+1)), labels=kmers)
    pp.plot()
    print('Type enter to exit.')
    raw_input()
Example #21
0
def bargraph_variation_diff(results):
    r = robjects.r

    for (standard, expert) in [('seq', 'expertseq'), ('par', 'expertpar')]:
        langs = []
        probs = []
        diffs = []
        for lang in languages:
            for prob in problems:
                loc = results[(lang, prob, standard)]
                loc_expert = results[(lang, prob, expert)]
                diff = (float(loc_expert) / float(loc) - 1)

                langs.append(pretty_langs[lang])
                probs.append(prob)
                diffs.append(diff)

        r.pdf('bargraph-loc-diff-' + standard + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Difference': FloatVector(diffs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('ylab("Lines of code difference (in percent)")') +\
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('scale_y_continuous(labels = percent_format())')
        pp.plot()
        r['dev.off']()
Example #22
0
 def plot_stacked_bar(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Example #23
0
def singleTablePlot_gg(parser, args):
    ''' kmerdict is a defaultdict(int)
        It can take both empty and non-empty kmerdicts
        returns update of the input kmerdict given the input string and k'''
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')
    kmerdict = kmercount_in_table(args.table1)
    data = defaultdict(list)
    numKmers = len(kmerdict)
    for k in sorted(kmerdict.keys()):
        data['kmers'].append(k)
        data['counts'].append(kmerdict[k])
    df = robjects.DataFrame(data)
    gp = ggplot2.ggplot(df)
    ##    pp = gp + ggplot2.geom_bar(stat="identity")
    pp = gp + ggplot2.aes_string(x=range(1,numKmers+1),y=data['counts']) \
         + ggplot2.geom_bar(stat="identity") \
         + ggplot2.scale_x_continuous(name="kmer", breaks=0.5+(range(1,numKmers+1)), labels=kmers)
    pp.plot()
    print('Type enter to exit.')
    raw_input()
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Example #25
0
def bargraph_variation_diff (cfg, values):
  r = robjects.r

  for (standard, expert) in [('seq', 'expertseq'), ('par', 'expertpar')]:
    langs = []
    probs = []
    diffs  = []
    for lang in cfg.languages:
      for prob in cfg.problems:
        data = FloatVector (values[prob][standard][lang][0])
        data_expert = FloatVector (values[prob][expert][lang][0])

        mean = r['mean'] (data)[0]
        mean_expert = r['mean'] (data_expert)[0]
        diff = (float(mean_expert) / float(mean) - 1)

        langs.append (pretty_langs [lang])
        probs.append (prob)
        diffs.append (diff)

    r.pdf ('bargraph-executiontime-diff-' + standard + '.pdf', height=pdf_height (), width=pdf_width ())
    df = robjects.DataFrame({'Language': StrVector (langs),
                             'Problem': StrVector (probs),
                             'Difference' : FloatVector (diffs),
      })
    
    #print (df)
    gp = ggplot2.ggplot (df)
  
    pp = gp + \
        ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Execution time difference (in percent)")') +\
        robjects.r('scale_y_continuous(labels = percent_format())')
    pp.plot ()
    r['dev.off']()
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['size'] = data['size'].astype(float)
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts (before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Example #27
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(
            aligned_read.qlen, 0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len, max_len + 1))
    counts_r = ro.IntVector(
        [align_lengths.get(l, 0) for l in range(min_len, max_len + 1)])

    df = ro.DataFrame({'length': len_r, 'counts': counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
Example #28
0
# Restructure hour data
hour_arr = []
hits_arr = []
for hour in hour_hits:
    hour_arr.append(datetime.min + timedelta(hours=int(hour[0:2])))
    hits_arr.append(hour_hits[hour])
hour = POSIXct(hour_arr)
hits = IntVector(hits_arr)

grdevices.png('analytics_out/hits_by_time.png')
df = robjects.DataFrame({'hour': POSIXct(hour), 'hits': IntVector(hits)})
pp = ggplot.ggplot(df) + \
    ggplot.aes_string(x = 'hour', y = 'hits') + \
    ggplot.scale_x_datetime(labels = scales.date_format('%H:%M UTC')) + \
    ggplot.geom_bar(stat = 'identity')
pp.plot()
grdevices.dev_off()

# Restructure circular hit time datax
time_hits_vec = FloatVector(time_hits)
hits_circ = circular.circular(time_hits_vec, units='hours', template='clock24')
hits_density = circular.density_circular(hits_circ, bw=100)

print('Von Mises fit for hits by time (hours past 00:00 UTC)')
hits_mle = circular.mle_vonmises(hits_circ)
mu = base.cbind(hits_mle.rx('mu'))[0][0]
mu_se = hits_mle.rx('se.mu')[0][0]
kappa = hits_mle.rx('kappa')[0][0]
kappa_se = hits_mle.rx('se.kappa')[0][0]
print('MLE: mu = %0.2f (%0.2f)  kappa = %0.2f (%0.2f)' %
Example #29
0
      'axis.title.x':element_text(size=size,color=robjects.r.color_axis_title, vjust=0),
      #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25),
      'axis.title.y':element_text(size=size,color=robjects.r.color_axis_title,angle=90)})

#??? efficiently change legend titles
#right now it takes two legend calls to make this work
#alternatives that tried and failed
#base_plot = lambda gr_name = 'variable': ggplot2.aes_string(x='x', y='value',group=gr_name,colour=gr_name, shape = gr_name)
#colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines, name = ltitle)

pandas2ri.activate() 
#set up basic, repetitive plot features
base_plot =  ggplot2.aes_string(x='x', y='value',group='variable',colour='variable', shape = 'variable')
line = ggplot2.geom_line()
point = ggplot2.geom_point() 
bar = ggplot2.geom_bar(stat="identity")
vert_line_onset = ggplot2.geom_vline(xintercept=-1, linetype=2, colour="red", alpha=0.25)           
vert_line_exhaust = ggplot2.geom_vline(xintercept=5, linetype=2, colour="red", alpha=0.25)  
ltitle = "crazy"         
ltitle_default = 'Variable'
#colors = lambda ltitle = ltitle_default: ggplot2.scale_colour_manual(values=robjects.r.palette_lines, name = ltitle)
colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines)
legend_t_c = lambda ltitle = ltitle_default: ggplot2.scale_color_discrete(name = ltitle) 
legend_t_s = lambda ltitle = ltitle_default: ggplot2.scale_shape_discrete(name = ltitle)
loc_default = robjects.r('c(1,0)')
legend_f  = lambda loc = loc_default: ggplot2.theme(**{'legend.position':loc, 'legend.justification':loc})
ggsave = lambda filename, plot: robjects.r.ggsave(filename=out_path + filename + ".pdf", plot=plot, width = 6, height = 4)

colors_alt = ggplot2.scale_colour_manual(values=robjects.r.palette_lines[1])
shape_alt = ggplot2.scale_shape_manual(values=17)
Example #30
0
def bargraph_variation():
    r = robjects.r
    for var in variations:
        # each variation gets plot
        values = []
        # normalized values
        nvalues = []

        langs = []
        probs = []

        for prob in problems:
            # aggregate by problems
            lvalues = []
            for lang in languages:
                # each problem displays a list of language times for that problem

                langs.append(pretty_langs[lang])
                probs.append(prob)
                value = 0
                try:
                    value = result[lang][prob][var]
                except KeyError:
                    print "Warning: no value for:"
                    print(lang, prob, var)
                    value = 0  # FIXME to account for missing seq-version of Erlang

                # for the expert times, add expert and non-expert times together
                if var.startswith('expert'):
                    try:
                        value = value + result[lang][prob][var.replace(
                            'expert', '')]
                    except KeyError:
                        pass
                lvalues.append(value)

            values.extend(lvalues)

            lmin = min([x for x in lvalues if x != 0])
            nvalues.extend([(lambda x: x / lmin)(la) for la in lvalues])

        # plot histogram of actual times
        r.pdf('bargraph-codingtime-var-' + var + '.pdf',
              height=pdf_height(),
              width=pdf_width())

        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Time': FloatVector(values),
        })

        dodge = ggplot2.position_dodge(width=0.9)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Time', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Coding time (in minutes)")')

        pp.plot()

        # plot histogram of times normalized with respect to fastest time for a problem
        r.pdf('bargraph-codingtime-var-norm-' + var + '.pdf',
              height=pdf_height(),
              width=pdf_width())

        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Time': FloatVector(nvalues),
        })

        dodge = ggplot2.position_dodge(width=0.9)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Time', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Coding time (normalized to fastest)")')

        pp.plot()
        r['dev.off']()
Example #31
0
def speedup_diffs (values, basis):
  r = robjects.r
  speedups = {}
  for var in ['par', 'expertpar']:
    speedups[var] = {}
    for lang in languages:
      speedups[var][lang] = {}
      i = 0
      p1 = 0
      print lang
      for prob in problems:
        i = i + 1
        speedups[var][lang][prob] = []
        base = r.mean (FloatVector (values [cfg.threads[-1]][prob][var.replace ('par','seq')][lang][0]))[0]
        # base with p = 1
        base_p1 = r.mean (FloatVector (values [1][prob][var][lang][0]))[0]
        # use fastest sequential program
        if basis == 'fastest' and base_p1 < base:
          base = base_p1
          p1 = p1 + 1
        elif basis == 'seq':
          pass
        elif basis == 'p1':
          base = base_p1
        
        mn = (r.mean (FloatVector (values[32][prob][var][lang][0])))[0]
        speedups[var][lang][prob].append (float (base) / float (mn))
      print i
      print p1
  langs = []
  probs = []
  diffs  = []
  for lang in languages:
    for prob in problems:
      sp = speedups['par'][lang][prob][0]
      sp_expert = speedups['expertpar'][lang][prob][0]
      diff = (float(sp_expert) / float(sp))

      langs.append (pretty_langs [lang])
      probs.append (prob)
      diffs.append (diff)

  r.pdf ('bargraph-speedup-diff.pdf', height=pdf_height (), width=pdf_width ())
  df = robjects.DataFrame({'Language': StrVector (langs),
                           'Problem': StrVector (probs),
                           'Difference' : FloatVector (diffs),
    })
    
  #print (df)
  gp = ggplot2.ggplot (df)
  
  scale = r('''
xformatter <- function(x) {
  sprintf("%d x", x)
}
scale_y_continuous(labels = xformatter)
''')

  pp = gp + \
      ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
      ggplot2.geom_bar (position='dodge', stat='identity') + \
      robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
      ggplot2_options () + \
      ggplot2_colors () + \
      r('ylab("Change in speedup")') +\
      scale
#      r('scale_y_continuous(labels = percent_format())')
  pp.plot ()
  r['dev.off']()
Example #32
0
def bargraph_variation (cfg, values):
  r = robjects.r
  for var in cfg.variations:
    # each variation gets plot
    avgs = []
    ses = []

    # normalized values
    navgs = []
    nses = []

    langs = []
    probs = []

    for prob in cfg.problems:
      # aggregate by problems
      lavgs = []
      lses = []
      for lang in cfg.languages:
        # each problem displays a list of language times for that problem
        data = FloatVector (values[prob][var][lang][0])
        
        langs.append (pretty_langs [lang])
        probs.append (prob)
        mean = r['mean'] (data)[0]
        lavgs.append (mean)

        t_result = r['t.test'] (data, **{"conf.level": 0.999}).rx ('conf.int')[0]
        lses.append ((t_result[1] - t_result[0])/2)
        
      avgs.extend (lavgs)
      ses.extend (lses)
        
      lmin = min (lavgs)
      navgs.extend ([la/lmin for la in lavgs])
      nses.extend ([ls/lmin for ls in lses])


    df = robjects.DataFrame({'Language': StrVector (langs),
                             'Problem': StrVector (probs),
                             'Time' : FloatVector (avgs),
                             'SE' : FloatVector (ses),
                             'NormTime' : FloatVector (navgs),
                             'NormSE' : FloatVector (nses),
                             'TimeLabel' : StrVector ([str(round(time, 1)) + "s" for time in avgs])
                             })

    # plot histogram of actual times
    r.pdf ('bargraph-executiontime-var-' + var + '.pdf', height=pdf_height (), width=pdf_width ())


    limits = ggplot2.aes (ymax = 'Time + SE', ymin = 'Time - SE')
    dodge = ggplot2.position_dodge (width=0.9)
    gp = ggplot2.ggplot (df)

    pp = gp + \
        ggplot2.aes_string (x='Problem', y='Time', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2.geom_errorbar (limits, position=dodge, width=0.25) + \
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Execution time (in seconds)")')
 
    pp.plot ()

    # plot histogram of times normalized with respect to fastest time for a problem
    r.pdf ('bargraph-executiontime-var-norm-' + var + '.pdf', height=pdf_height (), width=pdf_width ())

    limits = ggplot2.aes (ymax = 'NormTime + NormSE', ymin = 'NormTime - NormSE')
    dodge = ggplot2.position_dodge (width=0.9)
    gp = ggplot2.ggplot (df)

    pp = gp + \
        ggplot2.aes_string (x='Problem', y='NormTime', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        ggplot2.geom_errorbar (limits, position=dodge, width=0.25) +\
        ggplot2_options () + \
        ggplot2_colors () + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        robjects.r('ylab("Execution time (normalized to fastest)")')
        #ggplot2.geom_text(data=df,
        #                  mapping = ggplot2.aes_string (x='Problem', 
        #                                                y='NormTime + NormSE + 0.1', 
        #                                                label='TimeLabel')
 
    pp.plot ()
    r['dev.off']()
Example #33
0
def speedup_diffs(values, basis):
    r = robjects.r
    speedups = {}
    for var in ['par', 'expertpar']:
        speedups[var] = {}
        for lang in languages:
            speedups[var][lang] = {}
            i = 0
            p1 = 0
            print lang
            for prob in problems:
                i = i + 1
                speedups[var][lang][prob] = []
                base = r.mean(
                    FloatVector(values[cfg.threads[-1]][prob][var.replace(
                        'par', 'seq')][lang][0]))[0]
                # base with p = 1
                base_p1 = r.mean(FloatVector(values[1][prob][var][lang][0]))[0]
                # use fastest sequential program
                if basis == 'fastest' and base_p1 < base:
                    base = base_p1
                    p1 = p1 + 1
                elif basis == 'seq':
                    pass
                elif basis == 'p1':
                    base = base_p1

                mn = (r.mean(FloatVector(values[32][prob][var][lang][0])))[0]
                speedups[var][lang][prob].append(float(base) / float(mn))
            print i
            print p1
    langs = []
    probs = []
    diffs = []
    for lang in languages:
        for prob in problems:
            sp = speedups['par'][lang][prob][0]
            sp_expert = speedups['expertpar'][lang][prob][0]
            diff = (float(sp_expert) / float(sp))

            langs.append(pretty_langs[lang])
            probs.append(prob)
            diffs.append(diff)

    r.pdf('bargraph-speedup-diff.pdf', height=pdf_height(), width=pdf_width())
    df = robjects.DataFrame({
        'Language': StrVector(langs),
        'Problem': StrVector(probs),
        'Difference': FloatVector(diffs),
    })

    #print (df)
    gp = ggplot2.ggplot(df)

    scale = r('''
xformatter <- function(x) {
  sprintf("%d x", x)
}
scale_y_continuous(labels = xformatter)
''')

    pp = gp + \
        ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
        ggplot2.geom_bar (position='dodge', stat='identity') + \
        robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
        ggplot2_options () + \
        ggplot2_colors () + \
        r('ylab("Change in speedup")') +\
        scale
    #      r('scale_y_continuous(labels = percent_format())')
    pp.plot()
    r['dev.off']()
Example #34
0
def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
Example #35
0
    print("Start plotting...")
    grdevices = importr('grDevices')

    ro.r('''change_name=function(pop_size, generations,freq){
		name=sprintf("../results/mcm_%sNe_%sfreq_%sgen.png", pop_size,freq,generations)
		return(name)}
	     ''')
    name = ro.r['change_name']
    name = name(args.ps, args.gen, args.freq)
    print("Output figure in:", name)

    grdevices.png(file=name, width=700, height=700)

    gp = ggplot2.ggplot(res2)
    pp = gp + ggplot2.aes_string(
        x='Counts', y='Proportion') + ggplot2.geom_bar(
            stat="identity", color="darkgoldenrod3") + ggplot2.theme_bw()
    pp.plot()
    grdevices.dev_off()
    print("Plot done!")

elif (
        args.diff
):  ###references:doi: 10.1093/molbev/msx254 &&  https://doi.org/10.1111/j.1365-294X.2010.04997.x
    p = args.freq
    N = args.ps
    t = args.gen
    newx = []
    x = np.arange(0, 1.001, 0.001001001)
    res = [0] * len(x)
    print("Estimating allele counts")
    for i in range(1, 101):
Example #36
0
def bargraph_variation(cfg, values):
    r = robjects.r
    for var in cfg.variations:
        # each variation gets plot
        avgs = []
        ses = []

        # normalized values
        navgs = []
        nses = []

        langs = []
        probs = []

        for prob in cfg.problems:
            # aggregate by problems
            lavgs = []
            lses = []
            for lang in cfg.languages:
                # each problem displays a list of language times for that problem
                data = FloatVector(values[prob][var][lang][0])

                langs.append(pretty_langs[lang])
                probs.append(prob)
                mean = r['mean'](data)[0]
                lavgs.append(mean)

                t_result = r['t.test'](data, **{
                    "conf.level": 0.999
                }).rx('conf.int')[0]
                lses.append((t_result[1] - t_result[0]) / 2)

            avgs.extend(lavgs)
            ses.extend(lses)

            lmin = min(lavgs)
            navgs.extend([la / lmin for la in lavgs])
            nses.extend([ls / lmin for ls in lses])

        df = robjects.DataFrame({
            'Language':
            StrVector(langs),
            'Problem':
            StrVector(probs),
            'Time':
            FloatVector(avgs),
            'SE':
            FloatVector(ses),
            'NormTime':
            FloatVector(navgs),
            'NormSE':
            FloatVector(nses),
            'TimeLabel':
            StrVector([str(round(time, 1)) + "s" for time in avgs])
        })

        # plot histogram of actual times
        r.pdf('bargraph-executiontime-var-' + var + '.pdf',
              height=pdf_height(),
              width=pdf_width())

        limits = ggplot2.aes(ymax='Time + SE', ymin='Time - SE')
        dodge = ggplot2.position_dodge(width=0.9)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Time', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2.geom_errorbar (limits, position=dodge, width=0.25) + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Execution time (in seconds)")')

        pp.plot()

        # plot histogram of times normalized with respect to fastest time for a problem
        r.pdf('bargraph-executiontime-var-norm-' + var + '.pdf',
              height=pdf_height(),
              width=pdf_width())

        limits = ggplot2.aes(ymax='NormTime + NormSE',
                             ymin='NormTime - NormSE')
        dodge = ggplot2.position_dodge(width=0.9)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='NormTime', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2.geom_errorbar (limits, position=dodge, width=0.25) +\
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Execution time (normalized to fastest)")')
        #ggplot2.geom_text(data=df,
        #                  mapping = ggplot2.aes_string (x='Problem',
        #                                                y='NormTime + NormSE + 0.1',
        #                                                label='TimeLabel')

        pp.plot()
        r['dev.off']()
Example #37
0
def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()