コード例 #1
0
ファイル: squiggle.py プロジェクト: JohnUrban/poretools
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
コード例 #2
0
def _plt_distr(dat,
               col,
               title='',
               splitBy_pfill=True,
               pfill='label',
               independentpdf=False,
               fname='xdistr.pdf'):
    df = dat[dat[pfill] != 'NA']  ## remove invalid pairs
    n = len(df)
    df = {
        col: robjects.FloatVector(list(df[col])),
        pfill: robjects.StrVector(list(df[pfill]))
    }
    df = robjects.DataFrame(df)

    pp = ggplot2.ggplot(df) + \
        ggplot2.ggtitle('%s [Total = %s]' % (title, n))

    ## Plot1: counts
    if splitBy_pfill:
        p1 = pp + ggplot2.aes_string(x=col, fill=pfill)
    else:
        p1 = pp + ggplot2.aes_string(x=col)

    ## Plot2: density
    if splitBy_pfill:
        p2 = pp + ggplot2.aes_string(x=col, fill=pfill, y='..density..')
    else:
        p2 = pp + ggplot2.aes_string(x=col, y='..density..')
    p2 = p2 + ggplot2.geom_density(alpha=.5, origin=-500)

    if col == 'distance':
        p1 = p1 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.5, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)

        p2 = p2 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.33, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)
    else:
        p1 = p1 + \
            ggplot2.geom_histogram(alpha=.5, position='identity')

        p2 = p2 + \
            ggplot2.geom_histogram(alpha=.33, position='identity')

        if col == 'correlation':
            p1 = p1 + ggplot2.xlim(-1.1, 1.1)
            p2 = p2 + ggplot2.xlim(-1.1, 1.1)

    if independentpdf:
        grdevices = importr('grDevices')
        grdevices.pdf(file=fname)
        p1.plot()
        p2.plot()
        grdevices.dev_off()
    else:
        p1.plot()
        p2.plot()
    return
コード例 #3
0
def plotStats(data,
              outFolder,
              tiles,
              prop="qual",
              prefix="",
              high="yellow",
              low="blue",
              pdf=False,
              detail=True):
    #overview plot
    p = ggplot.ggplot(data)
    p = p + ggplot.aes_string(x="x", y="y", col=prop) \
        + ggplot.geom_point(size=0.1) \
        + ggplot.facet_wrap(robjects.Formula("~ tile")) \
        + ggplot.scale_colour_gradient(high=high, low=low) \
        + ggplot.ggtitle("Overview %s" % (prop))
    if prefix:
        fileName = "%s_overview_%s.png" % (prefix, prop)
    else:
        fileName = "overview_%s.png" % (prop)
    p.save(os.path.join(outFolder, fileName), scale=2)

    #detail plots
    if detail:
        detailFolder = os.path.join(outFolder, "detailPlots")
        for t in tiles:
            p = ggplot.ggplot(data.rx(data.rx2("tile").ro == t, True))
            p = p + ggplot.aes_string(x="x", y="y", col=prop) \
                + ggplot.geom_point(size=1) \
                + ggplot.facet_wrap(robjects.Formula("~ tile")) \
                + ggplot.scale_colour_gradient(high=high, low=low) \
                + ggplot.ggtitle("%i %s" % (t, prop))
            if prefix:
                fileName = "%s_%i_%s.png" % (prefix, t, prop)
            else:
                fileName = "%i_%s.png" % (t, prop)
            p.save(os.path.join(detailFolder, fileName), scale=2)
            if pdf:
                fileName = "%s%i_%s.pdf" % (prefix, t, prop)
                p.save(os.path.join(detailFolder, fileName), scale=2)
コード例 #4
0
ファイル: boxes.py プロジェクト: escott55/me_variome
def makeDistanceBox( alldata, figurename, feature="distance") :
    alldata["distance"] = alldata.het + alldata.hom

    r_dataframe = com.convert_to_r_dataframe(alldata)
    p = ggplot2.ggplot(r_dataframe) + \
                ggplot2.aes_string(x="factor(continent)", y=feature) + \
                ggplot2.geom_boxplot() + \
                ggplot2.ggtitle("Distance from Reference by Continent") + \
                ggplot2.theme(**mytheme) #+ \
                #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \
                #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') )

    grdevices.png(figurename)
    p.plot()
    grdevices.dev_off()
コード例 #5
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
コード例 #6
0
ファイル: box_bar_plot.py プロジェクト: xxz19900/hominid
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel):
    grdevices.pdf(file=plot_file_path)

    gp = ggplot2.ggplot(data)
    pp = gp \
        + ggplot2.aes_string(x='genotype', y='abundance') \
        + ggplot2.geom_boxplot() \
        + ggplot2.ggtitle(title) \
        + ggplot2.labs(x=xlabel, y=ylabel) \
        + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \
        + ggplot2.geom_point()

    pp.plot()

    grdevices.dev_off()
コード例 #7
0
ファイル: convertAPTresults.py プロジェクト: sofiakp/rnapred
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
コード例 #8
0
ファイル: linear.py プロジェクト: mdelhey/kaggle-galaxy
def ridge_cv_plot(val_err, lam_range):
    """
    Source: http://rpy.sourceforge.net/rpy2/doc-2.3/html/graphics.html
    """
    base = importr('base')
    df = pd.DataFrame(val_err, columns = lam_range)
    df = pd.melt(df)
    df_r = com.convert_to_r_dataframe(df)
    # Create boxplot
    gp = ggplot2.ggplot(df_r)
    pp = gp + \
         ggplot2.aes_string(x='factor(variable)', y='value') + \
         ggplot2.geom_boxplot() + \
         ggplot2.ggtitle("Validation Error by Lambda")
    pp.plot()
    return
コード例 #9
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes,
                    'count': counts,
                    'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
コード例 #10
0
##text_log+="average: "+str(rmean(test23)[0])+end
##text_log+="sum: "+str(rsum(test23)[0])+end
#
#roughbin= round(ma[0]/100)
#bins=round(roughbin/100)*100


#ma2=rmax(ed)

#dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true)

scales = importr('scales')

gp = ggplot2.ggplot(dataf)
	#geom_histogram(aes(y = ..density..))
	#   ggplot2.geom_density()+\

	    # pp = gp + ggplot2.aes_string(x='%s(contrrr)') +  ggplot2.geom_histogram()+ggplot2.scale_y_sqrt()
bins=10
teest3=robjects.r('theme(axis.text.x=element_text(angle=90))')

pp = gp + \
ggplot2.aes_string(x='Length') +  \
ggplot2.geom_histogram()+\
ggplot2.ggtitle("Found IS fragment lengths")+ \
ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\
ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \
teest3
pp.plot()
robjects.r.ggsave("/Users/security/science/dna_subj_hist.pdf")
コード例 #11
0
ファイル: gui.py プロジェクト: kenziD/A-Wonderful-Life
def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
コード例 #12
0

heat_demand = np.zeros(37)
Bdim = robjects.FloatVector([12,6])
for i,BO in enumerate(range(0,361,10)):
    res = ECR(Building_Orientation = BO,
                Building_Dim = Bdim)
    heat_demand[i] = res[2][0]

# Transfor to R data types
hd = robjects.FloatVector([h for h in heat_demand])
bo = robjects.FloatVector([b for b in range(0,361,10)])

# Create a python dictionary
p_datadic = {'Heat_Demand': hd,
             'Building_Orientation': bo}

# Create R data.frame
r_dataf = robjects.DataFrame(p_datadic)

# plot with ggplot2
gp = ggplot2.ggplot(r_dataf)
pp = gp + ggplot2.aes_string(y= 'Heat_Demand', x= 'Building_Orientation') + \
     ggplot2.geom_line(colour = "red", size = 1) + \
     ggplot2.coord_polar(direction = -1, start = -pi/2) + \
     ggplot2.ggtitle("Heat demand for all possible buildimg orientations") + \
     ggplot2.scale_x_continuous(breaks=robjects.FloatVector(range(0, 360, 15)))

pp.plot()
grdevices.dev_off()
コード例 #13
0
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)

	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)

	if args.savedf:
		robjects.r("write.table")(df, file=args.savedf, sep="\t")

	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_step(size=2) \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

        # extrapolation
	if args.extrapolate:
		start = robjects.ListVector({'a': 1, 'b': 1})
                pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                              formula='y~a*I((x*3600)^b)',
                                              se='FALSE', start=start) \
                        + ggplot2.xlim(0, float(args.extrapolate))

	if args.theme_bw:
		pp = pp + ggplot2.theme_bw()	

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			logger.error("Unrecognized extension for %s!" % (plot_file))
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
コード例 #14
0
ファイル: runDE.py プロジェクト: wqhf/flair
def main():
    '''
    maine
    '''

    # Command Line Stuff...
    myCommandLine = CommandLine()

    outdir     = myCommandLine.args['outDir']
    group1     = myCommandLine.args['group1']
    group2     = myCommandLine.args['group2']
    batch      = myCommandLine.args['batch']  
    matrix     = myCommandLine.args['matrix']
    prefix     = myCommandLine.args['prefix']
    formula    = myCommandLine.args['formula']

    print("running DESEQ2 %s" % prefix, file=sys.stderr)

    # make the quant DF
    quantDF  = pd.read_table(matrix, header=0, sep='\t', index_col=0)
    df = pandas2ri.py2ri(quantDF)

    # import formula
    formulaDF     = pd.read_csv(formula,header=0, sep="\t",index_col=0)
    sampleTable = pandas2ri.py2ri(formulaDF)


    if "batch" in list(formulaDF):
        design = Formula("~ batch + condition")
    else:
        design = Formula("~ condition")
   

    # import DESeq2
    from rpy2.robjects.packages import importr
    import rpy2.robjects.lib.ggplot2 as ggplot2
    methods   = importr('methods')
    deseq     = importr('DESeq2')
    grdevices = importr('grDevices')
    qqman     = importr('qqman')



    ### RUN DESEQ2 ###
    R.assign('df', df)
    R.assign('sampleTable', sampleTable)
    R.assign('design',design)
    R('dds <- DESeqDataSetFromMatrix(countData = df, colData = sampleTable, design = design)')
    R('dds <- DESeq(dds)')
    R('name <- grep("condition", resultsNames(dds), value=TRUE)')

    ###
    ###
    # Get Results and shrinkage values
    res    = R('results(dds, name=name)')
    resLFC = R('lfcShrink(dds, coef=name)')
    vsd    = R('vst(dds,blind=FALSE)')
    resdf  = robjects.r['as.data.frame'](res) 
    reslfc = robjects.r['as.data.frame'](resLFC)
    dds    = R('dds')

    
    ### Plotting section ###
    # plot MA and PC stats for the user
    plotMA    = robjects.r['plotMA']
    plotDisp  = robjects.r['plotDispEsts']
    plotPCA   = robjects.r['plotPCA']
    plotQQ    = robjects.r['qq']
    
    # get pca data
    if "batch" in list(formulaDF):
        pcaData    = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    else:
        print(vsd)
        pcaData    = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    # arrange 


    data_folder = os.path.join(os.getcwd(), outdir)
    qcOut = os.path.join(data_folder, "%s_QCplots_%s_v_%s.pdf"  % (prefix,group1,group2))
    
    grdevices.pdf(file=qcOut)

    x = "PC1: %s" % int(percentVar[0]*100) + "%% variance"
    y = "PC2: %s" % int(percentVar[1]*100) + "%% variance"

    if "batch" in list(formulaDF):
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()

    else:
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()
    pp.plot()
    plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results")
    plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrhinkage")    
    plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ")
    hh = ggplot2.ggplot(resdf) + \
            ggplot2.aes_string(x="pvalue") + \
            ggplot2.geom_histogram() + \
            ggplot2.theme_classic() + \
            ggplot2.ggtitle("pvalue distribution")
    hh.plot()
    plotDisp(dds, main="Dispersion Estimates")
    grdevices.dev_off()


    data_folder = os.path.join(os.getcwd(), outdir)
    lfcOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results_shrinkage.tsv"  % (prefix,group1,group2))
    resOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results.tsv"  % (prefix,group1,group2))
   
    robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t")
    robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")
コード例 #15
0
number_of_peaks = len(dataf[0])


cvI = []
newRow = []
for i in range(1,number_of_peaks+1):
    row = dataf.rx(i,True)
    rowA = np.array(row)
    newRow.append(rowA[2:])
    cvI.append(cv(rowA[2:]))
#cv.append(rowA[2:].std()/rowA[2:].mean())
cv_r=robjects.conversion.py2ri(cvI)
df_cv = {'CV' : cv_r}
dataf_cv = robjects.DataFrame(df_cv)
dtf_cv = robjects.r.melt(dataf_cv)
d=dataf.cbind(dtf_cv.rx(2))
d.names[tuple(d.colnames).index('value')] = 'CV'
#d = base.merge_data_frame(dataf,dtf_cv.rx(2))
utilis.write_csv(d, options.csv_output)


dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1)))
#n_peak = robjects.IntVector(1,number_of_peaks)
gp = ggplot2.ggplot(dc)
pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \
ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV")

r.X11()
pp.plot()

コード例 #16
0
    def _plot_with_rpy2(self, regions, filename):
        from rpy2 import robjects
        import rpy2.robjects.lib.ggplot2 as ggplot2
        from rpy2.robjects.lib import grid
        from rpy2.robjects.packages import importr
        grdevices = importr('grDevices')
        base = importr('base')
        grdevices.pdf(file=filename + '.pdf')

        t = [x for x in range(-self.num_bins, self.num_bins + 1)]
        for region in regions[:self.num_regs]:
            if not np.any(region.weighted):
                logger.warning(
                    "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.")
                continue
            middle = (len(region.weighted[0]) - 1) / 2
            if middle < self.num_bins:
                logger.error("Warning: There are less bins calculated for regions than you want to plot.")
                sys.exit(1)
            d = {'map': robjects.StrVector(
                [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]),
                't': robjects.FloatVector(t * len(region.weighted)),
                'e': robjects.FloatVector([i for sublist in region.weighted for i in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]])}
            dataf = robjects.DataFrame(d)
            gp = ggplot2.ggplot(dataf)  # first yellow second red
            p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle(
                "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs(
                y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45),
                   'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(
                y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(y="-log10(q-value)",
                                                                  x='bins (' + str(self.bin_res) + ' bp each)') + \
                 ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))),
                                    colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \
                 ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            g1 = ggplot2.ggplot2.ggplotGrob(p1)
            g2 = ggplot2.ggplot2.ggplotGrob(p2)
            g3 = ggplot2.ggplot2.ggplotGrob(p3)
            robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first')
            robjects.r("grid::grid.draw(g)")
            grid.newpage()
            logger.debug('Plotted region ' + str(region.bin))

        grdevices.dev_off()
コード例 #17
0
ファイル: yield_plot.py プロジェクト: monkollek/poretools
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)
	
	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)


	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_point() \
		+ ggplot2.geom_line() \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file)
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
コード例 #18
0
ファイル: makehistogram.py プロジェクト: TheodenS/ISsuite
#text_log+="sum: "+str(rsum(test23)[0])+end

roughbin= round(ma[0]/100)
bins=round(roughbin/100)*100


#ma2=rmax(ed)

#dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true)

scales = importr('scales')

gp = ggplot2.ggplot(dataf)
	#geom_histogram(aes(y = ..density..))
	#   ggplot2.geom_density()+\

	    # pp = gp + ggplot2.aes_string(x='%s(contrrr)') +  ggplot2.geom_histogram()+ggplot2.scale_y_sqrt()
#bins=10
theme=robjects.r('theme(axis.text.x=element_text(angle=90))')

pp = gp + \
ggplot2.aes_string(x='Length') +  \
ggplot2.geom_histogram()+\
ggplot2.ggtitle("Found IS fragment lengths")+ \
ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\
ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \
ggplot2.ggtitle(args.title)+\
theme
pp.plot()
robjects.r.ggsave(args.out)
コード例 #19
0
ファイル: gui.py プロジェクト: kenziD/A-Wonderful-Life
def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
コード例 #20
0
def _plt_percountr(dat, independentpdf=False, fname='xpercount.pdf'):
    def _filt_dat(dat, item, getlabel=True):
        df = pd.DataFrame(dat[item].value_counts())
        df.columns = ['count']
        if getlabel:
            df['label'] = [
                list(dat[dat[item] == i]['label'])[0] for i in df.index
            ]
        n = len(df)
        mx = max(df['count'])
        return df, n, mx

    dat = dat[dat['label'] != 'NA']

    ## NUMBER OF MIRNA PER TSS
    df, n, mx = _filt_dat(dat, 'tss', False)
    df = {'count': robjects.IntVector(df['count'])}
    df = robjects.DataFrame(df)

    pt = ggplot2.ggplot(df) + \
        ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \
        ggplot2.xlim(-.5, mx+1) + \
        ggplot2.aes_string(x='count') + \
        ggplot2.ggtitle('TSS [Total = %s]' % n) + \
        ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx)

    pt_den = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='count', y='..density..') + \
        ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \
        ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \
        ggplot2.ggtitle('TSS [Total = %s]' % n) + \
        ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx)

    ## NUMBER OF TSS PER MIRNA
    df, n, mx = _filt_dat(dat, 'mirna')
    df = {
        'count': robjects.IntVector(df['count']),
        'label': robjects.StrVector(df['label'])
    }
    df = robjects.DataFrame(df)

    _pm = ggplot2.ggplot(df) + \
        ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \
        ggplot2.xlim(-.5, mx+1) + \
        ggplot2.ggtitle('miRNA [Total = %s]' % n)

    _pm_den = ggplot2.ggplot(df) + \
        ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \
        ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \
        ggplot2.ggtitle('miRNA [Total = %s]' % n)

    ## not split by label
    pm = _pm + ggplot2.aes_string(x='count')
    pm_den = _pm_den + ggplot2.aes_string(x='count', y='..density..')

    ## split by label
    pms = _pm + ggplot2.aes_string(x='count', fill='label')
    pm_dens = _pm_den + ggplot2.aes_string(
        x='count', fill='label', y='..density..')

    ## add xlabelling (need to be added after aes_string)
    _xlab = ggplot2.labs(x='Number of TSS per miRNA (max = %s)' % mx)
    pm += _xlab
    pm_den += _xlab
    pms += _xlab
    pm_dens += _xlab

    if independentpdf:
        grdevices = importr('grDevices')
        grdevices.pdf(fname)
        pt.plot()
        pt_den.plot()
        pm.plot()
        pm_den.plot()
        pms.plot()
        pm_dens.plot()
        grdevices.dev_off()
    else:
        pt.plot()
        pt_den.plot()
        pm.plot()
        pm_den.plot()
        pms.plot()
        pm_dens.plot()
    return
コード例 #21
0
ファイル: graphics.py プロジェクト: hansenrl/cs249-2
#-- ggplot2mtcars-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width = 1000, height = 350, antialias="subpixel", type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_bin2d() + \
     ggplot2.ggtitle('geom_bin2d')
pp.plot(vp = vp)
#-- ggplot2geombin2d-end

vp = grid.viewport(**{'layout.pos.col':2, 'layout.pos.row': 1})
#-- ggplot2geomdensity2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_density2d() + \
     ggplot2.ggtitle('geom_density2d')
pp.plot(vp = vp)
#-- ggplot2geomdensity2d-end

vp = grid.viewport(**{'layout.pos.col':3, 'layout.pos.row': 1})
コード例 #22
0
ファイル: yield_plot.py プロジェクト: tmfarrell/ont_dap
def plot_collectors_curve(args, start_times, read_lengths):
    """
	Use rpy2 to create a collectors curve of the run
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]

    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
     for t in start_times])
    r_read_lengths = robjects.IntVector(read_lengths)

    # compute the cumulative based on reads or total base pairs
    if args.plot_type == 'reads':
        y_label = "Total reads"
        cumulative = \
         r.cumsum(robjects.IntVector([1] * len(start_times)))
    elif args.plot_type == 'basepairs':
        y_label = "Total base pairs"
        cumulative = r.cumsum(r_read_lengths)

    step = args.skip
    # make a data frame of the lists
    d = {
        'start':
        robjects.FloatVector(
            [r_start_times[n] for n in xrange(0, len(r_start_times), step)]),
        'lengths':
        robjects.IntVector(
            [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]),
        'cumul':
        robjects.IntVector(
            [cumulative[n] for n in xrange(0, len(cumulative), step)])
    }
    df = robjects.DataFrame(d)

    if args.savedf:
        robjects.r("write.table")(df, file=args.savedf, sep="\t")

    # title
    total_reads = len(read_lengths)
    total_bp = sum(read_lengths)
    plot_title = "Yield: " \
     + str(total_reads) + " reads and " \
     + str(total_bp) + " base pairs."

    # plot
    gp = ggplot2.ggplot(df)
    pp = gp + ggplot2.aes_string(x='start', y='cumul') \
     + ggplot2.geom_step(size=2) \
     + ggplot2.scale_x_continuous('Time (hours)') \
     + ggplot2.scale_y_continuous(y_label) \
     + ggplot2.ggtitle(plot_title)

    # extrapolation
    if args.extrapolate:
        start = robjects.ListVector({'a': 1, 'b': 1})
        pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                      formula='y~a*I((x*3600)^b)',
                                      se='FALSE', start=start) \
                + ggplot2.xlim(0, float(args.extrapolate))

    if args.theme_bw:
        pp = pp + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
コード例 #23
0
ファイル: convertAPTresults.py プロジェクト: sofiakp/rnapred
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []

    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0

        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0,
                                                                            1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))

                df = ro.DataFrame({
                    'x': ro.FloatVector(expr[:, tmp_idx[i]]),
                    'y': ro.FloatVector(expr[:, tmp_idx[j]])
                })
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})

                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir,
                                           ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename=outfile,
                                plot=gp,
                                width=85,
                                height=85,
                                unit='mm')
    df = pd.DataFrame({
        'name1': names_1,
        'name2': names_2,
        'cor': cors
    },
                      index=titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t')
    return df
コード例 #24
0
ファイル: r_environgraph.py プロジェクト: TheodenS/ISsuite
#print onlysurf

#colours2 = grdevices.topo_colors(10)
colours2 = grdevices.cm_colors(10)
#colours2 = grdevices.rainbow(20)
#print colours2
#colours = ggplot2.rainbow(54)
#bins=10
gp = ggplot2.ggplot(onlysurf)
#gp = ggplot2.ggplot(onlyfilts)

gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station")
gp=gp+ggplot2.scale_colour_gradientn(colours=colours2)
gp=gp+ggplot2.geom_text(col="black",offset = 10)
gp=gp+ggplot2.geom_point(position="jitter")
gp=gp+ggplot2.ggtitle(graphtitle)

robjects.r('library(ggmap)')
robjects.r('library(mapproj)')
robjects.r('map <- get_map(location = "Europe", zoom = 4)')
robjects.r('ggmap(map)')

#robjects.r('library(maps)')

#robjects.r('map("world", interior = FALSE)')

#robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)')
#gp.plot()

'''
pp = gp + \
コード例 #25
0
ファイル: plotter.py プロジェクト: hijinks/gis-data-plotter
r_sq_lab = "R^{2}~"+r_sq


y_lab = r("expression(Discharge (m^{3}/s))")
x_lab = r("expression(Area (km^{2}))")
annotate1 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.5, color = "red", label = "Mean Annual", parse=FALSE)')
annotate2 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.42, label = "'+r_sq_lab+'", color = "red", parse=TRUE)')
annotate3 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.34, label = "slope~'+sl+'", color = "red", parse=TRUE)')

annotate4 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.7, color = "blue", label = "LGM", parse=FALSE)')
annotate5 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.6, color = "blue", label = "'+r_sq_lab_lgm+'", parse=TRUE)')
annotate6 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.5, color = "blue", label = "slope~'+sl_lgm+'", parse=TRUE)')

pp = ggplot2.ggplot(dat_frame) + \
    ggplot2.aes_string(y='discharge', x='areas') + \
    ggplot2.ggtitle('Area vs. Sediment Flux') + \
    ggplot2.scale_x_log10(x_lab) + \
    ggplot2.theme_bw() + \
    ggplot2.stat_smooth(method = "lm", formula = 'y ~ x') + \
    ggplot2.scale_y_log10(y_lab) + \
    annotate1 + \
    annotate2 + \
    annotate3 + \
    annotate4 + \
    annotate5 + \
    annotate6 + \
    ggplot2.geom_point(color='blue') + \
    ggplot2.geom_errorbar(ggplot2.aes_string(ymin='min',ymax='max'), data=dat_frame, width=.02, alpha=.3) + \
    ggplot2.geom_point(data=dat_frame2,color='red',show_guide='FALSE' ) + \
    ggplot2.stat_smooth(data=dat_frame2, method = "lm", formula = 'y ~ x', color='red')
コード例 #26
0
ファイル: graphics.py プロジェクト: realmichaelzyy/cs249-2
grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width=1000,
              height=350,
              antialias="subpixel",
              type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_bin2d() + \
     ggplot2.ggtitle('geom_bin2d')
pp.plot(vp=vp)
#-- ggplot2geombin2d-end

vp = grid.viewport(**{'layout.pos.col': 2, 'layout.pos.row': 1})
#-- ggplot2geomdensity2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
     ggplot2.aes_string(x='value', y='other_value') + \
     ggplot2.geom_density2d() + \
     ggplot2.ggtitle('geom_density2d')
pp.plot(vp=vp)
#-- ggplot2geomdensity2d-end

vp = grid.viewport(**{'layout.pos.col': 3, 'layout.pos.row': 1})
コード例 #27
0
ファイル: squiggle.py プロジェクト: tmfarrell/ont_dap
def plot_squiggle(args, filename, start_times, mean_signals):
    """
	Use rpy2 to create a squiggle plot of the read
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]
    total_time = start_times[-1] - start_times[0]
    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
    r_mean_signals = robjects.FloatVector(mean_signals)

    # infer the appropriate number of events given the number of facets
    num_events = len(r_mean_signals)
    events_per_facet = (num_events / args.num_facets) + 1
    # dummy variable to control faceting
    facet_category = robjects.FloatVector([(i / events_per_facet) + 1
                                           for i in range(len(start_times))])

    # make a data frame of the start times and mean signals
    d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
    df = robjects.DataFrame(d)

    gp = ggplot2.ggplot(df)
    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
    else:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
         + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = os.path.basename(filename) + "." + args.saveas
        if os.path.isfile(plot_file):
            raise Exception(
                'Cannot create plot for %s: plot file %s already exists' %
                (filename, plot_file))
        if args.saveas == "pdf":
            grdevices.pdf(plot_file, width=8.5, height=11)
        elif args.saveas == "png":
            grdevices.png(plot_file, width=8.5, height=11, units="in", res=300)
        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()