예제 #1
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
예제 #2
0
def _plt_distr(dat,
               col,
               title='',
               splitBy_pfill=True,
               pfill='label',
               independentpdf=False,
               fname='xdistr.pdf'):
    df = dat[dat[pfill] != 'NA']  ## remove invalid pairs
    n = len(df)
    df = {
        col: robjects.FloatVector(list(df[col])),
        pfill: robjects.StrVector(list(df[pfill]))
    }
    df = robjects.DataFrame(df)

    pp = ggplot2.ggplot(df) + \
        ggplot2.ggtitle('%s [Total = %s]' % (title, n))

    ## Plot1: counts
    if splitBy_pfill:
        p1 = pp + ggplot2.aes_string(x=col, fill=pfill)
    else:
        p1 = pp + ggplot2.aes_string(x=col)

    ## Plot2: density
    if splitBy_pfill:
        p2 = pp + ggplot2.aes_string(x=col, fill=pfill, y='..density..')
    else:
        p2 = pp + ggplot2.aes_string(x=col, y='..density..')
    p2 = p2 + ggplot2.geom_density(alpha=.5, origin=-500)

    if col == 'distance':
        p1 = p1 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.5, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)

        p2 = p2 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.33, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)
    else:
        p1 = p1 + \
            ggplot2.geom_histogram(alpha=.5, position='identity')

        p2 = p2 + \
            ggplot2.geom_histogram(alpha=.33, position='identity')

        if col == 'correlation':
            p1 = p1 + ggplot2.xlim(-1.1, 1.1)
            p2 = p2 + ggplot2.xlim(-1.1, 1.1)

    if independentpdf:
        grdevices = importr('grDevices')
        grdevices.pdf(file=fname)
        p1.plot()
        p2.plot()
        grdevices.dev_off()
    else:
        p1.plot()
        p2.plot()
    return
예제 #3
0
def make_output(tss_cov, out_prefix, upstream, downstream):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-upstream,downstream+1):
        print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i])
    raw_out.close()

    # make plot data structures
    tss_i = ro.IntVector(range(-upstream,downstream+1))
    cov = ro.FloatVector(tss_cov)
    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_full.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()

    # construct zoomed plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_zoom.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #4
0
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
예제 #5
0
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
예제 #6
0
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
예제 #7
0
def line_plot(pdf_file,
              data,
              x,
              y,
              var,
              null_label="N/A",
              linetype=None,
              title=None,
              xlab=None,
              ylab=None,
              colorname=None,
              linename=None,
              **extra_aes_params):

    pdf(pdf_file, width=11.7, height=8.3, paper="a4r")
    if any(data[x].isnull()):
        labels = [null_label] + map(str, sorted(set(
            data[data[x].notnull()][x])))
        labels = robjects.StrVector(labels)
        nulls = data[x].isnull()
        label_vals = dict(zip(labels, range(len(labels))))
        data[x] = data[x].astype("str")
        data[x][nulls] = null_label
        data['sortcol'] = data[x].map(label_vals.__getitem__)
        data.sort('sortcol', inplace=True)
    else:
        labels = None

    if linetype and linetype != var:
        data['group'] = data[var].map(str) + data[linetype].map(str)
    else:
        data['group'] = data[var]

    rdata = common.convert_to_r_dataframe(data)
    if labels:
        ix = rdata.names.index(x)
        rdata[ix] = ordered(rdata[ix], levels=labels)

    gp = gg2.ggplot(rdata)
    pp = (
        gp + gg2.geom_point(size=3) +
        gg2.scale_colour_hue(name=(colorname or var)) +
        #gg2.scale_colour_continuous(low="black") +
        gg2.aes_string(x=x, y=y, color=var, variable=var) +
        ggtitle(title or "") + xlabel(xlab or x) + ylabel(ylab or y)  #+
        #gg2.scale_y_continuous(breaks=seq(0.0, 1.0, 0.05))
    )

    # line type stuff
    if linetype:
        pp += gg2.geom_path(gg2.aes_string(group='group', linetype=linetype),
                            size=0.5)
        pp += gg2.scale_linetype(name=(linename or linetype))
    else:
        pp += gg2.geom_path(gg2.aes_string(group='group'), size=0.5)

    pp.plot()
    dev_off()
예제 #8
0
    def _plot_with_rpy2(self, regions, filename):
        from rpy2 import robjects
        import rpy2.robjects.lib.ggplot2 as ggplot2
        from rpy2.robjects.lib import grid
        from rpy2.robjects.packages import importr
        grdevices = importr('grDevices')
        base = importr('base')
        grdevices.pdf(file=filename + '.pdf')

        t = [x for x in range(-self.num_bins, self.num_bins + 1)]
        for region in regions[:self.num_regs]:
            if not np.any(region.weighted):
                logger.warning(
                    "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.")
                continue
            middle = (len(region.weighted[0]) - 1) / 2
            if middle < self.num_bins:
                logger.error("Warning: There are less bins calculated for regions than you want to plot.")
                sys.exit(1)
            d = {'map': robjects.StrVector(
                [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]),
                't': robjects.FloatVector(t * len(region.weighted)),
                'e': robjects.FloatVector([i for sublist in region.weighted for i in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]])}
            dataf = robjects.DataFrame(d)
            gp = ggplot2.ggplot(dataf)  # first yellow second red
            p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle(
                "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs(
                y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45),
                   'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(
                y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(y="-log10(q-value)",
                                                                  x='bins (' + str(self.bin_res) + ' bp each)') + \
                 ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))),
                                    colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \
                 ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            g1 = ggplot2.ggplot2.ggplotGrob(p1)
            g2 = ggplot2.ggplot2.ggplotGrob(p2)
            g3 = ggplot2.ggplot2.ggplotGrob(p3)
            robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first')
            robjects.r("grid::grid.draw(g)")
            grid.newpage()
            logger.debug('Plotted region ' + str(region.bin))

        grdevices.dev_off()
예제 #9
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
	Plot the pore performance
	"""
    import math

    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

            # make a data frame of the lists
    d = {
        "rownum": robjects.IntVector(range(1, 17) * 32),
        "colnum": robjects.IntVector(sorted(range(1, 33) * 16)),
        "log10_tot_bp": robjects.IntVector(pore_values),
        "labels": robjects.IntVector(flowcell_layout),
    }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = (
        gp
        + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)")
        + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7)
        + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2)
        + gg.scale_colour_gradient2(low="black", mid="black", high="red")
        + gg.coord_fixed(ratio=1.4)
        + gg.labs(x=gg.NULL, y=gg.NULL)
    )

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=11, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=11, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print ("Type enter to exit.")
        raw_input()
예제 #10
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
    Plot the pore performance
    """
    import math
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

    # make a data frame of the lists
    d = {'rownum': robjects.IntVector(range(1,17)*32),
         'colnum': robjects.IntVector(sorted(range(1,33)*16)),
         'log10_tot_bp': robjects.IntVector(pore_values),
         'labels': robjects.IntVector(flowcell_layout)
         }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
                     x = 'factor(colnum)') \
        + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \
        + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
        + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
        + gg.coord_fixed(ratio=1.4) \
        + gg.labs(x=gg.NULL, y=gg.NULL)

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width = 11, height = 8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width = 11, height = 8.5,
                units = "in", res = 300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
 def create_plot(filename, data, performance_object):
     grdevices.png(file=filename)
     (ggplot2.ggplot(data) + ggplot2.aes_string(
         x="dimension", y="mean.%s" % performance_object) +
      ggplot2.geom_point(ggplot2.aes_string(colour="signature")) +
      ggplot2.geom_errorbar(
          ggplot2.aes_string(
              ymax="mean.%s+stderror.%s" %
              (performance_object, performance_object),
              ymin="mean.%s-stderror.%s" %
              (performance_object, performance_object),
          ))).plot()
     grdevices.dev_off()
예제 #12
0
def plot_hist(sizes, args):
    """
    Use rpy2 to plot a histogram of the read sizes
    """
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    sizes = robjects.IntVector([s for s in sizes \
                if s < args.max_length and s > args.min_length])

    sizes_min = min(sizes)
    sizes_max = max(sizes)

    binwidth = (sizes_max - sizes_min) / args.num_bins

    d = {'sizes': sizes}
    df = robjects.DataFrame(d)

    # plot
    gp = ggplot2.ggplot(df)

    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='sizes') \
                + ggplot2.geom_histogram(binwidth=binwidth)
    else:
        pp = gp + ggplot2.aes_string(x='sizes') \
            + ggplot2.geom_histogram(binwidth=binwidth) \
            + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
예제 #13
0
def plot(data, filename, title, ggplotter, xid="N", yid="RunTime", factorid="Step"):
    df = make_dataframe(data, xid, yid, factorid)
    grdevices.pdf(file=filename, width=10, height=6)
    gp = ggplot2.ggplot(df)
    pp = gp + \
        ggplot2.aes_string(x=xid, y=yid) + \
        ggplot2.aes_string(size=.5) + \
        ggplotter() + \
        ggplot2.aes_string(colour='factor(%s)' % factorid) + \
        ggplot2.aes_string(fill='factor(%s)' % factorid) + \
        ggplot2.opts(title=title) + \
        ggplot2.scale_fill_brewer(palette="Set2") + \
        ggplot2.scale_colour_brewer(palette="Set2")
    pp.plot()
    grdevices.dev_off()
예제 #14
0
def makeDistancePlot( alldata, figurename, feature="distance") :
    alldata["distance"] = alldata.het + alldata.hom

    r_dataframe = com.convert_to_r_dataframe(alldata)
    p = ggplot2.ggplot(r_dataframe) + \
                ggplot2.aes_string(x=feature ) + \
                ggplot2.geom_density(ggplot2.aes_string(fill="factor(continent)")) + \
                ggplot2.ggtitle("Distance from Reference by Continent") + \
                ggplot2.theme(**mytheme) #+ \
                #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \
                #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') )

    grdevices.png(figurename)
    p.plot()
    grdevices.dev_off()
예제 #15
0
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
예제 #16
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1))
    cov_r = ro.FloatVector(cov + control_cov)
    labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov))
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov", colour="label")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
        + ggplot2.scale_colour_discrete("")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #17
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window / 2, window / 2 + 1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
예제 #20
0
def plot(request):
    r = robjects.r

    ungram = Sentence.objects.filter(grammatical=False).exclude(
        rating='N').values_list('similarity', flat=True)
    gram = Sentence.objects.filter(grammatical=True).exclude(
        rating='N').values_list('similarity', flat=True)

    gram_r = robjects.FloatVector(gram)
    ungram_r = robjects.FloatVector(ungram)

    df = robjects.r["data.frame"]
    gram_df = df(gram="GRAM", similarity=gram_r)
    ungram_df = df(gram="UNGRAM", similarity=ungram_r)

    rbind = r['rbind']
    data = rbind(gram_df, ungram_df)

    pp = ggplot2.ggplot(data) + \
        ggplot2.aes_string(x="gram", y="similarity") + \
        ggplot2.geom_boxplot()

    grdevices = importr('grDevices')
    grdevices.png(file="data.png", width=580, height=512)
    pp.plot()
    grdevices.dev_off()

    image_data = open("data.png", "rb").read()

    return HttpResponse(image_data, mimetype="image/png")
예제 #21
0
파일: views.py 프로젝트: pealco/rater
def plot(request):
    r = robjects.r

    ungram = Sentence.objects.filter(grammatical=False).exclude(rating='N').values_list('similarity', flat=True)
    gram = Sentence.objects.filter(grammatical=True).exclude(rating='N').values_list('similarity', flat=True)

    gram_r = robjects.FloatVector(gram)
    ungram_r = robjects.FloatVector(ungram)

    df = robjects.r["data.frame"]
    gram_df = df(gram="GRAM", similarity=gram_r)
    ungram_df = df(gram="UNGRAM", similarity=ungram_r)

    rbind = r['rbind']
    data = rbind(gram_df, ungram_df)

    pp = ggplot2.ggplot(data) + \
        ggplot2.aes_string(x="gram", y="similarity") + \
        ggplot2.geom_boxplot()

    grdevices = importr('grDevices')
    grdevices.png(file="data.png", width=580, height=512)
    pp.plot()
    grdevices.dev_off()

    image_data = open("data.png", "rb").read()

    return HttpResponse(image_data, mimetype="image/png")
예제 #22
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options,args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len,max_len+1))
    counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)])
    
    df = ro.DataFrame({'length':len_r, 'counts':counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({
        "sample":
        robjects.StrVector(samp_vector),
        "value":
        robjects.FloatVector(samp_set1_vals + samp_set2_vals)
    })

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
예제 #24
0
 def line_plot(self, data, title, ylabel, img_file,
               x='date', y='size', c='type', clabel=''):
     if PLOTLIB == 'ggplot':
         # date_label = "%Y\n%b"
         date_label = "%Y\n%W"  # year + week number
         p = ggplot(data,
                    aes(x=x, y=y, color=c)) \
             + ggtitle(title) \
             + ylab(ylabel) \
             + xlab(' ') \
             + scale_x_date(breaks=date_breaks('3 months'),
                            labels=date_label) \
             + geom_line() + geom_point()
     elif PLOTLIB == 'rpy2.ggplot2':
         # convert y axis to float because R uses 32-bit signed integers,
         # values > 2 bln. (2^31) will overflow
         data[y] = data[y].astype(float)
         p = ggplot2.ggplot(data) \
             + ggplot2.aes_string(x=x, y=y, color=c) \
             + ggplot2.geom_line() + ggplot2.geom_point() \
             + GGPLOT2_THEME \
             + ggplot2.labs(title=title, x='', y=ylabel, color=clabel)
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
예제 #25
0
def compare_sum_barplot(locus_table, interval_table, intervals, loci, names,
        rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    #pdb.set_trace()
    frame2 = robjects.r('''agg_data <- aggregate(pi ~ interval + db, data = data, sum)''')
    if len(intervals) > 1:
        sort_string = '''agg_data$interval <- factor(agg_data$interval,{})'''.format(order_intervals(frame2[0]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''agg_data'''))
    plot = gg_frame + \
        ggplot2.aes_string(
                x = 'interval', 
                y = 'pi',
                fill='factor(db)'
            ) + \
        ggplot2.geom_bar(**{
            'position':'dodge',
            'colour':'#767676',
            'alpha':0.6
            }
        ) + \
        ggplot2.scale_y_continuous('net phylogenetic informativeness') + \
        ggplot2.scale_x_discrete('interval (years ago)') + \
        ggplot2.scale_fill_brewer("database", palette="Blues")
    return plot
예제 #26
0
def bargraph_language(results):
    r = robjects.r

    for language in languages:
        varis = []
        probs = []
        locs = []
        for (lang, prob, var) in results.keys():
            if lang == language:
                loc = results[(lang, prob, var)]
                varis.append(pretty_varis[var])
                probs.append(prob)
                locs.append(loc)
        r.pdf('bargraph-loc-lang-' + language + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Variation': StrVector(varis),
            'Problem': StrVector(probs),
            'Lines': IntVector(locs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Lines', fill='Variation') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Lines of Code")')
        pp.plot()
        r['dev.off']()
예제 #27
0
def render_plot(gp, args):
  """Render a plot using ggplot

  :gp: A base ggplot2 object
  :x: The x value expression
  :y: The y value expression
  :type: The type of plot to make

  """
  args = util.Namespace(args)

  import rpy2.robjects.lib.ggplot2 as ggplot2

  pp = gp + ggplot2.aes_string(x=args.x,
                               y=args.y)

  if args.type == 'points':
    pp += ggplot2.geom_point()
  elif args.type == 'lines':
    pp += ggplot2.geom_line()
  elif args.type == 'boxplot':
    pp += ggplot2.geom_boxplot()
  else:
    raise Exception("{0} not implemented".format(args.type))

  if args.facets is not None:
    try:
      pp += ggplot2.facet_grid(ro.Formula(args.facets))
    except Exception:
      pass

  try:
    pp.plot()
  except Exception:
    pass
예제 #28
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2*range(-window/2,window/2+1))
    cov_r = ro.FloatVector(cov+control_cov)
    labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov))
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage') + \
        ggplot2.scale_colour_discrete('')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #29
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream,
                    downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            raw_out = open(
                '%s_raw/%s_%s.txt' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')),
                'w')
            for i in range(-upstream, downstream + 1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][
                    upstream + i], control_te_tss_cov[te][upstream + i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2 * range(-upstream, downstream + 1))
    labels = ro.StrVector(['Main'] * (upstream + downstream + 1) +
                          ['Control'] * (upstream + downstream + 1))
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(
                file='%s_plot/%s_%s.pdf' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')))
            gp.plot()
            grdevices.dev_off()
예제 #30
0
def main():
    usage = 'usage: %prog [options] <mut1 file> <mut2 file>'
    parser = OptionParser(usage)
    parser.add_option('-m', dest='mut_norm', action='store_true', default=False, help='Normalize by # mutations (as opposed to sequenced bp) [Default: %default]')
    parser.add_option('-o', dest='output_pdf', default='mut_cmp.pdf', help='Output pdf file for heatmap [Default: %default]')
    parser.add_option('-r', dest='raw', action='store_true', default=False, help='Use raw mutation counts (as opposed to normalized for ACGT content) [Default: %default]')
    (options,args) = parser.parse_args()

    if len(args) != 2:
        parser.error(usage)
    else:
        mut1_file = args[0]
        mut2_file = args[1]

    mutation_profile1, seq_bp1 = parse_mutations(mut1_file, options.raw)
    mutation_profile2, seq_bp2 = parse_mutations(mut2_file, options.raw)

    relative_mutation_profile = compute_relative_profile(mutation_profile1, seq_bp1, mutation_profile2, seq_bp2)

    print_table(relative_mutation_profile)

    # make plotting data structures
    nts = ['_','A','C','G','T']
    nts1 = []
    nts2 = []
    rel = []
    for nt1 in nts:
        for nt2 in nts:
            nts1.append(nt1)
            nts2.append(nt2)
            rel.append(relative_mutation_profile[(nt1,nt2)])

    nts1_r = ro.StrVector(nts1)
    nts2_r = ro.StrVector(nts2)
    rel_r = ro.FloatVector(rel)

    df = ro.DataFrame({'nt1':nts1_r, 'nt2':nts2_r, 'rel':rel_r})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete(mut2_file, limits=nts) + \
        ggplot2.scale_y_discrete(mut1_file, limits=nts) + \
        ggplot2.scale_fill_gradient('Enrichment 1/2')
    '''

    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='nt2', y='nt1', fill='rel') + \
        ggplot2.geom_tile() + \
        ggplot2.scale_x_discrete('Read') + \
        ggplot2.scale_y_discrete('Reference') + \
        ggplot2.scale_fill_gradient2('log2 enrichment', low='darkblue', mid='white', high='darkred')

    # save to file
    grdevices.pdf(file=options.output_pdf)
    gp.plot()
    grdevices.dev_off()
예제 #31
0
 def BoxPlot_One(self, metabolite):
     #print(self.raw_data)
     r('graphics.off()')
     gp = ggplot2.ggplot(self.raw_data)
     pp = gp + \
         ggplot2.aes_string(x=self.metadata, y='`'+self.metabolite_dict[metabolite]+'`') + \
         ggplot2.geom_boxplot()
     pp.plot()
예제 #32
0
def multiple_locus_net_informativeness_scatterplot(locus_table, net_pi_table,
        loci):
    if loci[0].lower() != 'all':
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table,
            net_pi_table, tuple(loci))
    else:
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id"'''.format(locus_table,
            net_pi_table)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y = 'pi') + \
            ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), \
            size = 3, alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness')
    return plot
예제 #33
0
def plot(data, x, y, ylabel, color, filename):
    gp = ggplot2.ggplot(data=data)
    gp = gp + \
    ggplot2.geom_line(ggplot2.aes_string(x=x, y=y), color=color) + \
    ggplot2.theme(**{'axis.text.x' : ggplot2.element_text(angle = 90, hjust = 1),
                      'strip.text.y' : ggplot2.element_text(size = 6, angle=90)})  + \
    ggplot2.scale_y_continuous(ylabel) 
    ggplot2.ggplot2.ggsave(filename, gp)
예제 #34
0
    def barPlot(self, dataframe, filename, x_parm, y_parm):

        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(x=x_parm, y=y_parm)
        geom = ggplot2.geom_bar(stat="identity")
        gg = data + aes + geom
        gg.plot()
        grdevices.dev_off()
예제 #35
0
	def boxPlot(self, dataframe, filename, x_parm, y_parm): 

		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=x_parm,y=y_parm,)
		geom = ggplot2.geom_boxplot(alpha = 0.7,fill="aquamarine3")
		gg = data + aes + geom
		gg.plot()
		grdevices.dev_off()
예제 #36
0
	def barPlot(self, dataframe, filename, x_parm, y_parm): 
		
		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=x_parm,y=y_parm)
		geom = ggplot2.geom_bar(stat = "identity")
		gg = data + aes + geom
		gg.plot()
		grdevices.dev_off()
예제 #37
0
    def plot_ROC(self, path):
        robjects.r["pdf"](path, width=14, height=8)

        df = self.df
        # print(df)
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        gp += ggplot2.aes_string(x="fpr", y="tpr")
        gp += ggplot2.geom_line(color="blue")
        gp += ggplot2.geom_point(size=2)
        gp.plot()
예제 #38
0
def plot_start(x, y):
    import rpy2.robjects.lib.ggplot2 as ggplot2
    ##由于这一条import会有警告信息,放到这里,只有调用这个函数才会出现警告。
    utils = importr('utils')
    data = utils.read_csv(glob('*.csv')[0])
    plot = ggplot2.ggplot(data)
    plot = (plot + ggplot2.aes_string(x=x, y=y) + ggplot2.geom_point() +
            ggplot2.scale_colour_gradient(low="yellow", high="red") +
            ggplot2.labs(title="mtcars", x='wt', y='mpg'))
    plot.save('point.png')
예제 #39
0
	def histogram(self, dataframe, filename, parm, group, units):
		with suppress_stdout():
			grdevices.png(file=filename, width=512, height=512)
			data = ggplot2.ggplot(dataframe)
			aes = ggplot2.aes_string(x=parm,fill = group)
			geom = ggplot2.geom_histogram(colour="black")
			labs = ggplot2.labs(x=parm + " " + units)
			gg = data + aes + geom + labs
			gg.plot()
			grdevices.dev_off()
예제 #40
0
def compare_mean_boxplot(locus_table, interval_table, intervals, loci, names, rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    if len(intervals) > 1:
        sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(ggplot2.aes_string(fill = 'factor(db)'), **{
                    'outlier.size':3,
                    'outlier.colour':'#767676',
                    'outlier.alpha':0.3,
                    'alpha':0.6
                    }
                ) + \
                ggplot2.scale_y_continuous('mean phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)') + \
                ggplot2.scale_fill_brewer("database", palette='Blues')
    return plot
예제 #41
0
 def histogram(self, dataframe, filename, parm, group, units):
     with suppress_stdout():
         grdevices.png(file=filename, width=512, height=512)
         data = ggplot2.ggplot(dataframe)
         aes = ggplot2.aes_string(x=parm, fill=group)
         geom = ggplot2.geom_histogram(colour="black")
         labs = ggplot2.labs(x=parm + " " + units)
         gg = data + aes + geom + labs
         gg.plot()
         grdevices.dev_off()
예제 #42
0
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
예제 #43
0
    def plot_ROC(self, path):
        robjects.r['pdf'](path, width=14, height=8)

        df = self.df
        print(df)
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        gp += ggplot2.aes_string(x='fpr', y='tpr')
        gp += ggplot2.geom_line(color='blue')
        gp += ggplot2.geom_point(size=2)
        gp.plot()
예제 #44
0
def plot_hist(sizes, args):
    """
	Use rpy2 to plot a histogram of the read sizes
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    sizes = robjects.IntVector([s for s in sizes if s < args.max_length and s > args.min_length])

    sizes_min = min(sizes)
    sizes_max = max(sizes)

    binwidth = (sizes_max - sizes_min) / args.num_bins

    d = {"sizes": sizes}
    df = robjects.DataFrame(d)

    # plot
    gp = ggplot2.ggplot(df)

    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x="sizes") + ggplot2.geom_histogram(binwidth=binwidth)
    else:
        pp = gp + ggplot2.aes_string(x="sizes") + ggplot2.geom_histogram(binwidth=binwidth) + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=8.5, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print("Type enter to exit.")
        raw_input()
예제 #45
0
def rank_abundance_plot(counter, name):
    grdevices.png('analytics_out/{0}_rank_abundance.png'.format(name))
    ranks, fracs = rank_abundance_data(counter)
    df = robjects.DataFrame({'rank': ranks, 'f': fracs})
    pp = ggplot.ggplot(df) + \
        ggplot.aes_string(x = 'rank', y = 'f') + \
        ggplot.geom_point() + \
        ggplot.scale_y_log10(name = 'fraction of hits')
    pp.plot()
    grdevices.dev_off()
예제 #46
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]')
    parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]')
    parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]')
    parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]')
    (options,args) = parser.parse_args()
    
    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream,options.downstream+1))
    labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #47
0
    def boxPlot(self, dataframe, filename, x_parm, y_parm):

        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(
            x=x_parm,
            y=y_parm,
        )
        geom = ggplot2.geom_boxplot(alpha=0.7, fill="aquamarine3")
        gg = data + aes + geom
        gg.plot()
        grdevices.dev_off()
예제 #48
0
 def plot_similarity_matrix(self, item_type, image_file, title):
     '''Plot similarities of crawls (overlap of unique items)
     as heat map matrix'''
     data = defaultdict(dict)
     n = 1
     for crawl1 in self.similarity[item_type]:
         for crawl2 in self.similarity[item_type][crawl1]:
             similarity = self.similarity[item_type][crawl1][crawl2]
             data['crawl1'][n] = MonthlyCrawl.short_name(crawl1)
             data['crawl2'][n] = MonthlyCrawl.short_name(crawl2)
             data['similarity'][n] = similarity
             data['sim_rounded'][n] = similarity  # to be rounded
             n += 1
     data = pandas.DataFrame(data)
     print(data)
     # select median of similarity values as midpoint of similarity scale
     midpoint = data['similarity'].median()
     decimals = 3
     textsize = 2
     minshown = .0005
     if (data['similarity'].max()-data['similarity'].min()) > .2:
         decimals = 2
         textsize = 2.8
         minshown = .005
     data['sim_rounded'] = data['sim_rounded'].apply(
         lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0')
         if x >= minshown else '0')
     print('Median of similarities for', item_type, '=', midpoint)
     matrix_size = len(self.similarity[item_type])
     if matrix_size > self.MAX_MATRIX_SIZE:
         n = 0
         for crawl1 in sorted(self.similarity[item_type], reverse=True):
             short_name = MonthlyCrawl.short_name(crawl1)
             if n > self.MAX_MATRIX_SIZE:
                 data = data[data['crawl1'] != short_name]
                 data = data[data['crawl2'] != short_name]
             n += 1
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl2', y='crawl1',
                              fill='similarity', label='sim_rounded') \
         + ggplot2.geom_tile(color="white") \
         + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white",
                                        midpoint=midpoint, space="Lab") \
         + GGPLOT2_THEME \
         + ggplot2.coord_fixed() \
         + ggplot2.theme(**{'axis.text.x':
                            ggplot2.element_text(angle=45,
                                                 vjust=1, hjust=1)}) \
         + ggplot2.labs(title=title, x='', y='') \
         + ggplot2.geom_text(color='black', size=textsize)
     img_path = os.path.join(PLOTDIR, image_file)
     p.save(img_path)
     return p
예제 #49
0
    def plot_all_errors(self, path):
        # print self.error_matrix[0]

        robjects.r["pdf"](path, width=14, height=8)

        df = pandas.melt(self.df, id_vars="iteration")
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        x_col = "iteration"
        gp += ggplot2.aes_string(x=x_col, y="value", color="variable")
        gp += ggplot2.geom_point(size=2)
        gp += ggplot2.geom_line()
        gp.plot()
예제 #50
0
def plot_coef(feat_mat_dir,
              model_dir,
              expt_names,
              pref,
              outfile=None,
              height=120,
              fsize=12):

    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names,
         tmp_gene_names) = read_feat_mat(feat_mat_file)

        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1)

    nexpt = expt_idx + 1

    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({
        'feature': np.repeat(feat_names, nexpt),
        'Classification': np.reshape(clf_coef, (clf_coef.size, )),
        'Regression': np.reshape(reg_coef, (reg_coef.size, ))
    })

    df2 = pd.melt(df, id_vars='feature', var_name='fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename=outfile,
                    plot=gp,
                    width=w,
                    height=height,
                    unit='mm')
    return df
def plotStats(data,
              outFolder,
              tiles,
              prop="qual",
              prefix="",
              high="yellow",
              low="blue",
              pdf=False,
              detail=True):
    #overview plot
    p = ggplot.ggplot(data)
    p = p + ggplot.aes_string(x="x", y="y", col=prop) \
        + ggplot.geom_point(size=0.1) \
        + ggplot.facet_wrap(robjects.Formula("~ tile")) \
        + ggplot.scale_colour_gradient(high=high, low=low) \
        + ggplot.ggtitle("Overview %s" % (prop))
    if prefix:
        fileName = "%s_overview_%s.png" % (prefix, prop)
    else:
        fileName = "overview_%s.png" % (prop)
    p.save(os.path.join(outFolder, fileName), scale=2)

    #detail plots
    if detail:
        detailFolder = os.path.join(outFolder, "detailPlots")
        for t in tiles:
            p = ggplot.ggplot(data.rx(data.rx2("tile").ro == t, True))
            p = p + ggplot.aes_string(x="x", y="y", col=prop) \
                + ggplot.geom_point(size=1) \
                + ggplot.facet_wrap(robjects.Formula("~ tile")) \
                + ggplot.scale_colour_gradient(high=high, low=low) \
                + ggplot.ggtitle("%i %s" % (t, prop))
            if prefix:
                fileName = "%s_%i_%s.png" % (prefix, t, prop)
            else:
                fileName = "%i_%s.png" % (t, prop)
            p.save(os.path.join(detailFolder, fileName), scale=2)
            if pdf:
                fileName = "%s%i_%s.pdf" % (prefix, t, prop)
                p.save(os.path.join(detailFolder, fileName), scale=2)
예제 #52
0
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)
    
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width) 
    ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
def runBoruta():
    base.load("Rcode/zscores.RData")
    base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/trainClassifier/Rcode/borutaRelevance.R')
    outputBoruta = globalenv['findRelevant'](globalenv['massallfeatures'], globalenv['nonmassallfeatures'])

    # generate boxplot comparison of relevant mass features vs. the same non-mass feature
    plotgp = ggplot2.ggplot(outputBoruta.rx2("masszscore_selected")) + \
          ggplot2.aes_string(x='MorN', y='zscores', fill = 'factor(MorN)') + \
          ggplot2.geom_boxplot() + \
          ggplot2.opts(title = "Comparison of Z-scores for Mass confirmed features", y="Z-scores") 
    plotgp.plot()
    
    return
예제 #54
0
def single_locus_net_informativeness(locus_table, net_pi_table, locus):
    qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
    WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table,
            net_pi_table, locus)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \
            ggplot2.geom_point(size = 3, alpha = 0.4) + \
            ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness') + \
            ggplot2.opts(title = locus)

    return plot
예제 #55
0
파일: wblanes.py 프로젝트: etlapale/wblanes
    def compute(x0, y0, x1, y1):
        # Selected square
        sel = 255 - average(img[y0:y1,x0:x1],2)
        # Average across x’s
        ysel = average(sel,1)
        line = ysel
        xs = mgrid[0:line.shape[0]]

        # Pass the data to R
        rxs = robjects.FloatVector(xs)
        rys = robjects.FloatVector(line)
        rdf = robjects.DataFrame({'x': rxs, 'y': rys})
        robjects.globalenv['xs'] = rxs
        robjects.globalenv['df'] = rdf
        #print(rys.r_repr())

        # Fir an R model
        robjects.r('''fit <- nls(y ~ (off + c1 * exp(-(x-mu1)**2/(2*sg1**2))
                                    + c2 * exp(-(x-mu2)**2/(2*sg2**2))),
                               data=df,
                               start=list(off = 90, c1=120, mu1=30, sg1=10,
                                   c2=120, mu2=60, sg2=10),
                               algorithm='port')''')

        # Get fit results
        robjects.r('''k <- coef(fit)
                      fitdat <- data.frame(x=xs)
                      fitdat$y <- predict(fit, newdata=fitdat)
                      ## Independent Gaussians
                      fitg1 <- data.frame(x=xs)
                      fitg1$y <- k[['off']] + k[['c1']] * exp(-(xs-k[['mu1']])**2/(2*k[['sg1']]**2))
                      fitg2 <- data.frame(x=xs)
                      fitg2$y <- k[['off']] + k[['c2']] * exp(-(xs-k[['mu2']])**2/(2*k[['sg2']]**2))''')
        
        # Plot R fits
        fitdat = robjects.globalenv['fitdat']
        fitg1 = robjects.globalenv['fitg1']
        fitg2 = robjects.globalenv['fitg2']
        pp = ggplot2.ggplot(rdf) \
             + ggplot2.aes_string(x='x', y='y') \
             + ggplot2.geom_point() \
             + ggplot2.geom_smooth(data=fitdat, stat="identity", size=1.5) \
             + ggplot2.geom_smooth(data=fitg1, stat="identity") \
             + ggplot2.geom_smooth(data=fitg2, stat="identity")
        pp.plot()

        # Compute the ratio of the gaussian integrals
        ratio = robjects.r('''k[['c1']]*k[['sg1']]/(k[['c1']]*k[['sg1']]+k[['c2']]*k[['sg2']])''')
        
        title('ratio='+str(ratio))
        show()
예제 #56
0
def bargraph_variation_diff():
    r = robjects.r

    for (standard, expert) in [('seq', 'expertseq'), ('par', 'expertpar')]:
        langs = []
        probs = []
        diffs = []
        for lang in languages:
            for prob in problems:
                error = False
                try:
                    time = result[lang][prob][standard]
                except KeyError:
                    error = True
                try:
                    time_expert = result[lang][prob][expert]
                except KeyError:
                    error = True

                if not error:
                    diff = (float(time_expert + time) / float(time) - 1)
                else:
                    diff = 0

                langs.append(pretty_langs[lang])
                probs.append(prob)
                diffs.append(diff)

        r.pdf('bargraph-codingtime-diff-' + standard + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Language': StrVector(langs),
            'Problem': StrVector(probs),
            'Difference': FloatVector(diffs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Difference', fill='Language') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('ylab("Coding time difference (in percent)")') +\
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('scale_y_continuous(labels = percent_format())')
        pp.plot()
        r['dev.off']()