예제 #1
0
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
예제 #2
0
def make_output(tss_cov, out_prefix, upstream, downstream):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-upstream,downstream+1):
        print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i])
    raw_out.close()

    # make plot data structures
    tss_i = ro.IntVector(range(-upstream,downstream+1))
    cov = ro.FloatVector(tss_cov)
    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_full.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()

    # construct zoomed plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_zoom.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #3
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
예제 #4
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]')
    parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]')
    parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]')
    parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]')
    (options,args) = parser.parse_args()
    
    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream,options.downstream+1))
    labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #5
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1))
    cov_r = ro.FloatVector(cov + control_cov)
    labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov))
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov", colour="label")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
        + ggplot2.scale_colour_discrete("")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #6
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2*range(-window/2,window/2+1))
    cov_r = ro.FloatVector(cov+control_cov)
    labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov))
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage') + \
        ggplot2.scale_colour_discrete('')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #7
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options,args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len,max_len+1))
    counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)])
    
    df = ro.DataFrame({'length':len_r, 'counts':counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
예제 #8
0
def compare_sum_barplot(locus_table, interval_table, intervals, loci, names,
        rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    #pdb.set_trace()
    frame2 = robjects.r('''agg_data <- aggregate(pi ~ interval + db, data = data, sum)''')
    if len(intervals) > 1:
        sort_string = '''agg_data$interval <- factor(agg_data$interval,{})'''.format(order_intervals(frame2[0]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''agg_data'''))
    plot = gg_frame + \
        ggplot2.aes_string(
                x = 'interval', 
                y = 'pi',
                fill='factor(db)'
            ) + \
        ggplot2.geom_bar(**{
            'position':'dodge',
            'colour':'#767676',
            'alpha':0.6
            }
        ) + \
        ggplot2.scale_y_continuous('net phylogenetic informativeness') + \
        ggplot2.scale_x_discrete('interval (years ago)') + \
        ggplot2.scale_fill_brewer("database", palette="Blues")
    return plot
예제 #9
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window / 2, window / 2 + 1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #10
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream,
                    downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            raw_out = open(
                '%s_raw/%s_%s.txt' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')),
                'w')
            for i in range(-upstream, downstream + 1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][
                    upstream + i], control_te_tss_cov[te][upstream + i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2 * range(-upstream, downstream + 1))
    labels = ro.StrVector(['Main'] * (upstream + downstream + 1) +
                          ['Control'] * (upstream + downstream + 1))
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(
                file='%s_plot/%s_%s.pdf' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')))
            gp.plot()
            grdevices.dev_off()
예제 #11
0
def plot(data, x, y, ylabel, color, filename):
    gp = ggplot2.ggplot(data=data)
    gp = gp + \
    ggplot2.geom_line(ggplot2.aes_string(x=x, y=y), color=color) + \
    ggplot2.theme(**{'axis.text.x' : ggplot2.element_text(angle = 90, hjust = 1),
                      'strip.text.y' : ggplot2.element_text(size = 6, angle=90)})  + \
    ggplot2.scale_y_continuous(ylabel) 
    ggplot2.ggplot2.ggsave(filename, gp)
예제 #12
0
    def _plot_with_rpy2(self, regions, filename):
        from rpy2 import robjects
        import rpy2.robjects.lib.ggplot2 as ggplot2
        from rpy2.robjects.lib import grid
        from rpy2.robjects.packages import importr
        grdevices = importr('grDevices')
        base = importr('base')
        grdevices.pdf(file=filename + '.pdf')

        t = [x for x in range(-self.num_bins, self.num_bins + 1)]
        for region in regions[:self.num_regs]:
            if not np.any(region.weighted):
                logger.warning(
                    "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.")
                continue
            middle = (len(region.weighted[0]) - 1) / 2
            if middle < self.num_bins:
                logger.error("Warning: There are less bins calculated for regions than you want to plot.")
                sys.exit(1)
            d = {'map': robjects.StrVector(
                [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]),
                't': robjects.FloatVector(t * len(region.weighted)),
                'e': robjects.FloatVector([i for sublist in region.weighted for i in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]])}
            dataf = robjects.DataFrame(d)
            gp = ggplot2.ggplot(dataf)  # first yellow second red
            p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle(
                "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs(
                y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45),
                   'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(
                y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(y="-log10(q-value)",
                                                                  x='bins (' + str(self.bin_res) + ' bp each)') + \
                 ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))),
                                    colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \
                 ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            g1 = ggplot2.ggplot2.ggplotGrob(p1)
            g2 = ggplot2.ggplot2.ggplotGrob(p2)
            g3 = ggplot2.ggplot2.ggplotGrob(p3)
            robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first')
            robjects.r("grid::grid.draw(g)")
            grid.newpage()
            logger.debug('Plotted region ' + str(region.bin))

        grdevices.dev_off()
예제 #13
0
def plot_coef(feat_mat_dir,
              model_dir,
              expt_names,
              pref,
              outfile=None,
              height=120,
              fsize=12):

    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names,
         tmp_gene_names) = read_feat_mat(feat_mat_file)

        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1)

    nexpt = expt_idx + 1

    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({
        'feature': np.repeat(feat_names, nexpt),
        'Classification': np.reshape(clf_coef, (clf_coef.size, )),
        'Regression': np.reshape(reg_coef, (reg_coef.size, ))
    })

    df2 = pd.melt(df, id_vars='feature', var_name='fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename=outfile,
                    plot=gp,
                    width=w,
                    height=height,
                    unit='mm')
    return df
예제 #14
0
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)
    
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width) 
    ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
예제 #15
0
def single_locus_net_informativeness(locus_table, net_pi_table, locus):
    qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
    WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table,
            net_pi_table, locus)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \
            ggplot2.geom_point(size = 3, alpha = 0.4) + \
            ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness') + \
            ggplot2.opts(title = locus)

    return plot
예제 #16
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
예제 #17
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
예제 #18
0
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
예제 #19
0
def multiple_locus_net_informativeness_scatterplot(locus_table, net_pi_table,
        loci):
    if loci[0].lower() != 'all':
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table,
            net_pi_table, tuple(loci))
    else:
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id"'''.format(locus_table,
            net_pi_table)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y = 'pi') + \
            ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), \
            size = 3, alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness')
    return plot
예제 #20
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes,
                    'count': counts,
                    'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
예제 #21
0
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif':motif_names, 'thresh':thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot = gp, width = width, height = 300, unit = 'mm')
예제 #22
0
def plot_cv_r2(pandas_df,
               outfile,
               fsize=10,
               height=120,
               max_width=50,
               xlab='Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)

    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width)
    ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm')
예제 #23
0
def compare_mean_boxplot(locus_table, interval_table, intervals, loci, names, rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    if len(intervals) > 1:
        sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(ggplot2.aes_string(fill = 'factor(db)'), **{
                    'outlier.size':3,
                    'outlier.colour':'#767676',
                    'outlier.alpha':0.3,
                    'alpha':0.6
                    }
                ) + \
                ggplot2.scale_y_continuous('mean phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)') + \
                ggplot2.scale_fill_brewer("database", palette='Blues')
    return plot
예제 #24
0
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
예제 #25
0
def plot_thresh_distr(motif_names, thresh, out_dir, width=350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif': motif_names, 'thresh': thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot=gp,
                    width=width,
                    height=300,
                    unit='mm')
예제 #26
0
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile = None, height = 120, fsize = 12):
    
    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file)
        
        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert(all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis = 1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis = 1)
    
    nexpt = expt_idx + 1
    
    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({'feature':np.repeat(feat_names, nexpt),
                       'Classification':np.reshape(clf_coef, (clf_coef.size,)),
                       'Regression':np.reshape(reg_coef, (reg_coef.size,))})

    df2 = pd.melt(df, id_vars = 'feature', var_name = 'fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
    return df
예제 #27
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w')
            for i in range(-upstream,downstream+1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2*range(-upstream,downstream+1))
    labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1))
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')))
            gp.plot()
            grdevices.dev_off()
예제 #28
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e' % (i, cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window/2,window/2+1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #29
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(
            aligned_read.qlen, 0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len, max_len + 1))
    counts_r = ro.IntVector(
        [align_lengths.get(l, 0) for l in range(min_len, max_len + 1)])

    df = ro.DataFrame({'length': len_r, 'counts': counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
예제 #30
0
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)
	
	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)


	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_point() \
		+ ggplot2.geom_line() \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file)
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
예제 #31
0
파일: benchmarks.py 프로젝트: ktargows/rpy2
d['n_loop']    = IntVector([x[-1] for x in combos]) + IntVector([x[3] for x in combos_r])
d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))])
dataf = DataFrame(d)



from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop", 
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop", 
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.labs(title = "Benchmark (running time)")


from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width = 712, height = 512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
예제 #32
0
def plot_squiggle(args, filename, start_times, mean_signals):
    """
	Use rpy2 to create a squiggle plot of the read
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]
    total_time = start_times[-1] - start_times[0]
    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
    r_mean_signals = robjects.FloatVector(mean_signals)

    # infer the appropriate number of events given the number of facets
    num_events = len(r_mean_signals)
    events_per_facet = (num_events / args.num_facets) + 1
    # dummy variable to control faceting
    facet_category = robjects.FloatVector([(i / events_per_facet) + 1
                                           for i in range(len(start_times))])

    # make a data frame of the start times and mean signals
    d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
    df = robjects.DataFrame(d)

    gp = ggplot2.ggplot(df)
    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
    else:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
         + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = os.path.basename(filename) + "." + args.saveas
        if os.path.isfile(plot_file):
            raise Exception(
                'Cannot create plot for %s: plot file %s already exists' %
                (filename, plot_file))
        if args.saveas == "pdf":
            grdevices.pdf(plot_file, width=8.5, height=11)
        elif args.saveas == "png":
            grdevices.png(plot_file, width=8.5, height=11, units="in", res=300)
        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
예제 #33
0
파일: test.py 프로젝트: dvu4/udacity
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()
 
p_map.plot()
 
## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4)
 
p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \
    ggplot2.stat_smooth(color="black") + \
    ggplot2.opts(**{'legend.position': 'none'}) + \
    ggplot2.scale_x_continuous("Obama Vote Share") + \
    ggplot2.scale_y_continuous("Distance to nearest Railroad")
 
p_sub.plot(vp=vp_sub)

grdevices.dev_off()
예제 #34
0
파일: r_plot.py 프로젝트: yarden/biorpy
def plot_qc_reads(qc_df):
    """
    Plot number of reads part of a pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0)#.set_index("sample")
    cols = ["sample",
            "num_reads",
            "num_mapped",
            "num_unique_mapped",
            "num_junctions"]
    qc_df = qc_df[cols]
    melted_qc = pandas.melt(qc_df, id_vars=["sample"])
    qc_r = conversion_pydataframe(melted_qc)
    labels = tuple(["num_reads",
                    "num_mapped",
                    "num_unique_mapped",
                    "num_junctions"])
    labels = robj.StrVector(labels)
    variable_i = qc_r.names.index('variable')
    qc_r[variable_i] = robj.FactorVector(qc_r[variable_i],
                                         levels = labels)
    ggplot2.theme_set(ggplot2.theme_bw(12))
    scales = importr("scales")
    r_opts = r.options(scipen=4)
    p = ggplot2.ggplot(qc_r) + \
        ggplot2.geom_point(aes_string(x="sample", y="value")) + \
        ggplot2.scale_y_continuous(trans=scales.log10_trans(),
                                   breaks=scales.trans_breaks("log10",
                                                              robj.r('function(x) 10^x')),
                                   labels=scales.trans_format("log10",
                                                              robj.r('math_format(10^.x)'))) + \
        r.xlab("CLIP-Seq samples") + \
        r.ylab("No. reads") + \
        ggplot2.coord_flip() + \
        ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \
        theme(**{"panel.grid.major.x": element_blank(),
                 "panel.grid.minor.x": element_blank(),
                 "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)})
    p.plot()

    return
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r.par(bty="n", lwd=1.7, lty=2)
    r_opts = r.options(scipen=4)
    r.options(r_opts)
    r.dotchart(convert_to_r_matrix(qc_df[["num_reads",
                                          "num_mapped",
                                          "num_unique_mapped"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped",
                                          "num_ribo",
                                          "num_junctions"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
예제 #35
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []

    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0

        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0,
                                                                            1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))

                df = ro.DataFrame({
                    'x': ro.FloatVector(expr[:, tmp_idx[i]]),
                    'y': ro.FloatVector(expr[:, tmp_idx[j]])
                })
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})

                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir,
                                           ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename=outfile,
                                plot=gp,
                                width=85,
                                height=85,
                                unit='mm')
    df = pd.DataFrame({
        'name1': names_1,
        'name2': names_2,
        'cor': cors
    },
                      index=titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t')
    return df
예제 #36
0
파일: test.py 프로젝트: sg1845/udacity
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()

p_map.plot()

## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4)

p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \
    ggplot2.stat_smooth(color="black") + \
    ggplot2.opts(**{'legend.position': 'none'}) + \
    ggplot2.scale_x_continuous("Obama Vote Share") + \
    ggplot2.scale_y_continuous("Distance to nearest Railroad")

p_sub.plot(vp=vp_sub)

grdevices.dev_off()
예제 #37
0
number_of_peaks = len(dataf[0])


cvI = []
newRow = []
for i in range(1,number_of_peaks+1):
    row = dataf.rx(i,True)
    rowA = np.array(row)
    newRow.append(rowA[2:])
    cvI.append(cv(rowA[2:]))
#cv.append(rowA[2:].std()/rowA[2:].mean())
cv_r=robjects.conversion.py2ri(cvI)
df_cv = {'CV' : cv_r}
dataf_cv = robjects.DataFrame(df_cv)
dtf_cv = robjects.r.melt(dataf_cv)
d=dataf.cbind(dtf_cv.rx(2))
d.names[tuple(d.colnames).index('value')] = 'CV'
#d = base.merge_data_frame(dataf,dtf_cv.rx(2))
utilis.write_csv(d, options.csv_output)


dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1)))
#n_peak = robjects.IntVector(1,number_of_peaks)
gp = ggplot2.ggplot(dc)
pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \
ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV")

r.X11()
pp.plot()

예제 #38
0
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)

	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)

	if args.savedf:
		robjects.r("write.table")(df, file=args.savedf, sep="\t")

	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_step(size=2) \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

        # extrapolation
	if args.extrapolate:
		start = robjects.ListVector({'a': 1, 'b': 1})
                pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                              formula='y~a*I((x*3600)^b)',
                                              se='FALSE', start=start) \
                        + ggplot2.xlim(0, float(args.extrapolate))

	if args.theme_bw:
		pp = pp + ggplot2.theme_bw()	

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			logger.error("Unrecognized extension for %s!" % (plot_file))
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
예제 #39
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d',
                      dest='downstream',
                      default=2000,
                      type='int',
                      help='TSS downstream [Default: %default]')
    parser.add_option('-o',
                      dest='out_prefix',
                      default='tss',
                      help='Output prefix [Default: %default]')
    parser.add_option('-u',
                      dest='upstream',
                      default=5000,
                      type='int',
                      help='TSS upstream [Default: %default]')
    parser.add_option('--ymax',
                      dest='ymax',
                      default=None,
                      type='float',
                      help='Y-coordinate limit [Default: %default]')
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream, options.downstream + 1))
    labels = ro.StrVector(['Main'] *
                          (options.upstream + options.downstream + 1) +
                          ['Control'] *
                          (options.upstream + options.downstream + 1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage',
                                         limits=ro.FloatVector(
                                             [0, options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
예제 #40
0
def plot_collectors_curve(args, start_times, read_lengths):
    """
	Use rpy2 to create a collectors curve of the run
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]

    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
     for t in start_times])
    r_read_lengths = robjects.IntVector(read_lengths)

    # compute the cumulative based on reads or total base pairs
    if args.plot_type == 'reads':
        y_label = "Total reads"
        cumulative = \
         r.cumsum(robjects.IntVector([1] * len(start_times)))
    elif args.plot_type == 'basepairs':
        y_label = "Total base pairs"
        cumulative = r.cumsum(r_read_lengths)

    step = args.skip
    # make a data frame of the lists
    d = {
        'start':
        robjects.FloatVector(
            [r_start_times[n] for n in xrange(0, len(r_start_times), step)]),
        'lengths':
        robjects.IntVector(
            [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]),
        'cumul':
        robjects.IntVector(
            [cumulative[n] for n in xrange(0, len(cumulative), step)])
    }
    df = robjects.DataFrame(d)

    if args.savedf:
        robjects.r("write.table")(df, file=args.savedf, sep="\t")

    # title
    total_reads = len(read_lengths)
    total_bp = sum(read_lengths)
    plot_title = "Yield: " \
     + str(total_reads) + " reads and " \
     + str(total_bp) + " base pairs."

    # plot
    gp = ggplot2.ggplot(df)
    pp = gp + ggplot2.aes_string(x='start', y='cumul') \
     + ggplot2.geom_step(size=2) \
     + ggplot2.scale_x_continuous('Time (hours)') \
     + ggplot2.scale_y_continuous(y_label) \
     + ggplot2.ggtitle(plot_title)

    # extrapolation
    if args.extrapolate:
        start = robjects.ListVector({'a': 1, 'b': 1})
        pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                      formula='y~a*I((x*3600)^b)',
                                      se='FALSE', start=start) \
                + ggplot2.xlim(0, float(args.extrapolate))

    if args.theme_bw:
        pp = pp + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
예제 #41
0
d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector(
    [x[1] for x in combos_r])
d['group'] = StrVector(
    [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))])
dataf = DataFrame(d)

from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop",
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop",
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.opts(title = "Benchmark (running time)")

from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png', width=712, height=512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'),
                  data=dataf,
def plot_volcano_with_r(
    data,
    xlabel='Estimated effect (change in H/L ratio)',
    title='',
    max_labels=20,
    color_background='#737373',
    color_significant='#252525',
    color_significant_muted='#252525',
    label_only_large_fc=False,
    special_labels=None,
    special_palette=None,
    base_size=12,
    label_size=3,
    x='logFC',
    y='neg_log10_p_adjust',
    special_labels_mode='all',
    xlim=None,
    skip_labels=None,
    nudges=None,
):

    r_data, r_like_data = transform_data_for_ggplot(
        data,
        label_only_large_fc=label_only_large_fc,
        special_labels=special_labels,
        max_labels=max_labels,
        special_labels_mode=special_labels_mode,
        skip_labels=skip_labels,
        nudges=nudges)

    plot = r_ggplot2.ggplot(r_data)
    plot += r_ggplot2.theme_minimal(base_size=base_size)
    plot += r_ggplot2.theme(
        **{
            'panel.grid.major':
            r_ggplot2.element_blank(),
            'panel.grid.minor':
            r_ggplot2.element_blank(),
            'panel.border':
            r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black")
        })
    plot += r_ggplot2.theme(
        text=r_ggplot2.element_text(family='Helvetica', face='plain'))
    plot += r_ggplot2.theme(
        **{
            'plot.title': r_ggplot2.element_text(hjust=0.5),
            #                               'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)),
        })

    aes_points = r_ggplot2.aes_string(x=x, y=y, color='group')
    scale_points = r_ggplot2.scale_colour_manual(
        aes_points,
        values=r_label_palette(
            r_like_data,
            special_palette,
            color_background=color_background,
            color_significant=color_significant,
            color_significant_muted=color_significant_muted))

    plot += aes_points
    plot += scale_points

    if xlim is not None:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim))
    else:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits)

    plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit)

    plot += r_ggplot2.geom_hline(
        yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)),
        color='#BDBDBD',
        alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)

    plot += r_ggplot2.geom_point(**{'show.legend': False})

    aes_text = r_ggplot2.aes_string(label='label')
    plot += aes_text
    plot += r_ggrepel.geom_text_repel(
        aes_text,
        nudge_x=r_dollar(r_data, 'nudgex'),
        nudge_y=r_dollar(r_data, 'nudgey'),
        size=label_size,
        family='Helvetica',
        **{
            'show.legend': False,
            'point.padding': 0.25,
            'min.segment.length': 0,
            #'max.iter':0,
            'segment.color': '#BDBDBD'
        },
    )

    plot += r_ggplot2.labs(x=xlabel,
                           y='Adjusted p value (-log10)',
                           title=title)

    plot.plot()
예제 #43
0
##text_log+="average: "+str(rmean(test23)[0])+end
##text_log+="sum: "+str(rsum(test23)[0])+end
#
#roughbin= round(ma[0]/100)
#bins=round(roughbin/100)*100


#ma2=rmax(ed)

#dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true)

scales = importr('scales')

gp = ggplot2.ggplot(dataf)
	#geom_histogram(aes(y = ..density..))
	#   ggplot2.geom_density()+\

	    # pp = gp + ggplot2.aes_string(x='%s(contrrr)') +  ggplot2.geom_histogram()+ggplot2.scale_y_sqrt()
bins=10
teest3=robjects.r('theme(axis.text.x=element_text(angle=90))')

pp = gp + \
ggplot2.aes_string(x='Length') +  \
ggplot2.geom_histogram()+\
ggplot2.ggtitle("Found IS fragment lengths")+ \
ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\
ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \
teest3
pp.plot()
robjects.r.ggsave("/Users/security/science/dna_subj_hist.pdf")