Beispiel #1
0
def make_output(tss_cov, out_prefix, upstream, downstream):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-upstream,downstream+1):
        print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i])
    raw_out.close()

    # make plot data structures
    tss_i = ro.IntVector(range(-upstream,downstream+1))
    cov = ro.FloatVector(tss_cov)
    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_full.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()

    # construct zoomed plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_zoom.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #2
0
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2*range(-window/2,window/2+1))
    cov_r = ro.FloatVector(cov+control_cov)
    labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov))
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage') + \
        ggplot2.scale_colour_discrete('')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #4
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options,args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len,max_len+1))
    counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)])
    
    df = ro.DataFrame({'length':len_r, 'counts':counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
Beispiel #5
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1))
    cov_r = ro.FloatVector(cov + control_cov)
    labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov))
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov", colour="label")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
        + ggplot2.scale_colour_discrete("")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #6
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window / 2, window / 2 + 1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #7
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream,
                    downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            raw_out = open(
                '%s_raw/%s_%s.txt' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')),
                'w')
            for i in range(-upstream, downstream + 1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][
                    upstream + i], control_te_tss_cov[te][upstream + i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2 * range(-upstream, downstream + 1))
    labels = ro.StrVector(['Main'] * (upstream + downstream + 1) +
                          ['Control'] * (upstream + downstream + 1))
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(
                file='%s_plot/%s_%s.pdf' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')))
            gp.plot()
            grdevices.dev_off()
Beispiel #8
0
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
def gray_plot(data, min=0, max=1, name=""):
    reshape = importr('reshape')
    gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y']))
    pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \
         ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \
         ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \
         ggplot2.coord_equal() + ggplot2.scale_x_continuous(name)
    return pg
Beispiel #10
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]')
    parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]')
    parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]')
    parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]')
    (options,args) = parser.parse_args()
    
    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream,options.downstream+1))
    labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #11
0
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean,
                                          prefix=''):
    pandas2ri.activate()
    subgroups_to_lrr_count = {}
    columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []}
    for subgroup, (acc20means,
                   acc20_count) in subgroups_to_lrrs_acc20mean.items():
        subgroups_to_lrr_count[subgroup] = acc20_count
        for index, acc20mean in enumerate(acc20means):
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data['pos'].append(index + 1)
            columns_to_data['acc20'].append(acc20mean)

    # Write the count of LRRs for each subgroup to file
    with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"),
              'w') as f:
        for subgroup, lrr_count in subgroups_to_lrr_count.items():
            f.write(str.format("{}: {}\n", subgroup, lrr_count))

    # Generate the line chart file
    r_columns_to_data = {
        'subgroup': ro.StrVector(columns_to_data['subgroup']),
        'pos': ro.IntVector(columns_to_data['pos']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    line_chart_file_path = os.path.join(OUTPUT_PATH,
                                        prefix + "step3_5_lrr_acc20_line.png")
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx')))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Beispiel #12
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
Beispiel #13
0
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname,
                                          line_chart_file_path):
    logging.debug(
        str.format("Begin to generate {}, data {}", line_chart_file_path,
                   ts_to_acc20s))
    ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s)
    columns_to_data = {tname: [], 'site': [], 'acc20': []}
    for ss, acc20means in ts_to_acc20mean.items():
        for index, acc20mean in enumerate(acc20means):
            columns_to_data[tname].append(ss)
            columns_to_data['site'].append(index - 5)
            columns_to_data['acc20'].append(acc20mean)

    # Generate the line chart file
    r_columns_to_data = {
        tname: ro.StrVector(columns_to_data[tname]),
        'site': ro.IntVector(columns_to_data['site']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))),
                                    labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5']))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Beispiel #14
0
def singleTablePlot_gg(parser, args):
    ''' kmerdict is a defaultdict(int)
        It can take both empty and non-empty kmerdicts
        returns update of the input kmerdict given the input string and k'''
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')
    kmerdict = kmercount_in_table(args.table1)
    data = defaultdict(list)
    numKmers = len(kmerdict)
    for k in sorted(kmerdict.keys()):
        data['kmers'].append(k)
        data['counts'].append(kmerdict[k])
    df = robjects.DataFrame(data)
    gp = ggplot2.ggplot(df)
    ##    pp = gp + ggplot2.geom_bar(stat="identity")
    pp = gp + ggplot2.aes_string(x=range(1,numKmers+1),y=data['counts']) \
         + ggplot2.geom_bar(stat="identity") \
         + ggplot2.scale_x_continuous(name="kmer", breaks=0.5+(range(1,numKmers+1)), labels=kmers)
    pp.plot()
    print('Type enter to exit.')
    raw_input()
Beispiel #15
0
def singleTablePlot_gg(parser, args):
    ''' kmerdict is a defaultdict(int)
        It can take both empty and non-empty kmerdicts
        returns update of the input kmerdict given the input string and k'''
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')
    kmerdict = kmercount_in_table(args.table1)
    data = defaultdict(list)
    numKmers = len(kmerdict)
    for k in sorted(kmerdict.keys()):
        data['kmers'].append(k)
        data['counts'].append(kmerdict[k])
    df = robjects.DataFrame(data)
    gp = ggplot2.ggplot(df)
##    pp = gp + ggplot2.geom_bar(stat="identity")
    pp = gp + ggplot2.aes_string(x=range(1,numKmers+1),y=data['counts']) \
         + ggplot2.geom_bar(stat="identity") \
         + ggplot2.scale_x_continuous(name="kmer", breaks=0.5+(range(1,numKmers+1)), labels=kmers)
    pp.plot()
    print('Type enter to exit.')
    raw_input()
Beispiel #16
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w')
            for i in range(-upstream,downstream+1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2*range(-upstream,downstream+1))
    labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1))
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')))
            gp.plot()
            grdevices.dev_off()
Beispiel #17
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e' % (i, cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window/2,window/2+1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #18
0
def main():
    usage = 'usage: %prog [options] arg'
    parser = OptionParser(usage)
    #parser.add_option()
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide BAM file')
    else:
        bam_file = args[0]

    align_lengths = {}
    for aligned_read in pysam.Samfile(bam_file, 'rb'):
        align_lengths[aligned_read.qlen] = align_lengths.get(
            aligned_read.qlen, 0) + 1

    min_len = min(align_lengths.keys())
    max_len = max(align_lengths.keys())

    # construct data frame
    len_r = ro.IntVector(range(min_len, max_len + 1))
    counts_r = ro.IntVector(
        [align_lengths.get(l, 0) for l in range(min_len, max_len + 1)])

    df = ro.DataFrame({'length': len_r, 'counts': counts_r})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='length', y='counts') + \
        ggplot2.geom_bar(stat='identity') + \
        ggplot2.scale_x_continuous('Alignment length') + \
        ggplot2.scale_y_continuous('')

    # plot to file
    grdevices.pdf(file='align_lengths.pdf')
    gp.plot()
    grdevices.dev_off()
Beispiel #19
0
d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))])
dataf = DataFrame(d)



from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop", 
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop", 
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.labs(title = "Benchmark (running time)")


from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width = 712, height = 512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'), data = dataf, 
Beispiel #20
0
pp = gp + \
ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station") + \
ggplot2.scale_colour_gradientn(colours=colours2)+ \
ggplot2.geom_text(col="black")+ \
ggplot2.geom_point()

ggplot2.ggtitle(graphtitle)
pp.plot()
'''
#robjects.r.ggsave((str(args.out).replace(".pdf",""))+"map.pdf")

onlyfiltxxx=robjects.r('onl<-subset(allfilters,filtersize %in% c("0p1","0p8","3p0"))')
#print "here is"
#print robjects.r('print(datass1)')
#print "there was"
'''
ggplot2.scale_x_continuous(name=xlabel,breaks=scales.pretty_breaks(20)) +\
ggplot2.scale_y_continuous(labels=scales.comma,name=ylabel,breaks=scales.pretty_breaks(10))+ ggplot2.theme(title=ggplot2.element_text(colour="blue",face="bold"))''' 

if args.plotfraction=="False":
    barsdata=robjects.r('p<-ggplot(data=onl, aes(x=mystation, y=istranscripts,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))')
if args.plotfraction=="True":
    barsdata=robjects.r('p<-ggplot(data=onl, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))')
#barsdata=robjects.r('p<-ggplot(data=datass1, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))')
#teest=robjects.r('p<-p+coord_fixed(ratio=400)')
#barsg = ggplot2.ggplot(onlysurf)
#barsg=barsg+ggplot2.aes_string(x="mystation", y="ratio")
#barsg=barsg+ggplot2.geom_bar(stat="identity")
#bbb=robjects.r('p<-ggplot(data=smalls11, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))')
#barsg=barsg+ggplot2.geom_bar(stat="identity",position=ggplot2.position_dodge())
#barsg=barsg+ggplot2.theme(axis.text.x=ggplot2.element_text(angle=90))
Beispiel #21
0
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()
 
p_map.plot()
 
## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4)
 
p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \
    ggplot2.stat_smooth(color="black") + \
    ggplot2.opts(**{'legend.position': 'none'}) + \
    ggplot2.scale_x_continuous("Obama Vote Share") + \
    ggplot2.scale_y_continuous("Distance to nearest Railroad")
 
p_sub.plot(vp=vp_sub)

grdevices.dev_off()
Beispiel #22
0
def plot_squiggle(args, filename, start_times, mean_signals):
    """
	Use rpy2 to create a squiggle plot of the read
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]
    total_time = start_times[-1] - start_times[0]
    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
    r_mean_signals = robjects.FloatVector(mean_signals)

    # infer the appropriate number of events given the number of facets
    num_events = len(r_mean_signals)
    events_per_facet = (num_events / args.num_facets) + 1
    # dummy variable to control faceting
    facet_category = robjects.FloatVector([(i / events_per_facet) + 1
                                           for i in range(len(start_times))])

    # make a data frame of the start times and mean signals
    d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
    df = robjects.DataFrame(d)

    gp = ggplot2.ggplot(df)
    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
    else:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
         + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = os.path.basename(filename) + "." + args.saveas
        if os.path.isfile(plot_file):
            raise Exception(
                'Cannot create plot for %s: plot file %s already exists' %
                (filename, plot_file))
        if args.saveas == "pdf":
            grdevices.pdf(plot_file, width=8.5, height=11)
        elif args.saveas == "png":
            grdevices.png(plot_file, width=8.5, height=11, units="in", res=300)
        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Beispiel #23
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []

    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0

        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0,
                                                                            1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))

                df = ro.DataFrame({
                    'x': ro.FloatVector(expr[:, tmp_idx[i]]),
                    'y': ro.FloatVector(expr[:, tmp_idx[j]])
                })
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})

                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir,
                                           ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename=outfile,
                                plot=gp,
                                width=85,
                                height=85,
                                unit='mm')
    df = pd.DataFrame({
        'name1': names_1,
        'name2': names_2,
        'cor': cors
    },
                      index=titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t')
    return df
def plot_volcano_with_r(
    data,
    xlabel='Estimated effect (change in H/L ratio)',
    title='',
    max_labels=20,
    color_background='#737373',
    color_significant='#252525',
    color_significant_muted='#252525',
    label_only_large_fc=False,
    special_labels=None,
    special_palette=None,
    base_size=12,
    label_size=3,
    x='logFC',
    y='neg_log10_p_adjust',
    special_labels_mode='all',
    xlim=None,
    skip_labels=None,
    nudges=None,
):

    r_data, r_like_data = transform_data_for_ggplot(
        data,
        label_only_large_fc=label_only_large_fc,
        special_labels=special_labels,
        max_labels=max_labels,
        special_labels_mode=special_labels_mode,
        skip_labels=skip_labels,
        nudges=nudges)

    plot = r_ggplot2.ggplot(r_data)
    plot += r_ggplot2.theme_minimal(base_size=base_size)
    plot += r_ggplot2.theme(
        **{
            'panel.grid.major':
            r_ggplot2.element_blank(),
            'panel.grid.minor':
            r_ggplot2.element_blank(),
            'panel.border':
            r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black")
        })
    plot += r_ggplot2.theme(
        text=r_ggplot2.element_text(family='Helvetica', face='plain'))
    plot += r_ggplot2.theme(
        **{
            'plot.title': r_ggplot2.element_text(hjust=0.5),
            #                               'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)),
        })

    aes_points = r_ggplot2.aes_string(x=x, y=y, color='group')
    scale_points = r_ggplot2.scale_colour_manual(
        aes_points,
        values=r_label_palette(
            r_like_data,
            special_palette,
            color_background=color_background,
            color_significant=color_significant,
            color_significant_muted=color_significant_muted))

    plot += aes_points
    plot += scale_points

    if xlim is not None:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim))
    else:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits)

    plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit)

    plot += r_ggplot2.geom_hline(
        yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)),
        color='#BDBDBD',
        alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)

    plot += r_ggplot2.geom_point(**{'show.legend': False})

    aes_text = r_ggplot2.aes_string(label='label')
    plot += aes_text
    plot += r_ggrepel.geom_text_repel(
        aes_text,
        nudge_x=r_dollar(r_data, 'nudgex'),
        nudge_y=r_dollar(r_data, 'nudgey'),
        size=label_size,
        family='Helvetica',
        **{
            'show.legend': False,
            'point.padding': 0.25,
            'min.segment.length': 0,
            #'max.iter':0,
            'segment.color': '#BDBDBD'
        },
    )

    plot += r_ggplot2.labs(x=xlabel,
                           y='Adjusted p value (-log10)',
                           title=title)

    plot.plot()
Beispiel #25
0
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()

p_map.plot()

## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4)

p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \
    ggplot2.stat_smooth(color="black") + \
    ggplot2.opts(**{'legend.position': 'none'}) + \
    ggplot2.scale_x_continuous("Obama Vote Share") + \
    ggplot2.scale_y_continuous("Distance to nearest Railroad")

p_sub.plot(vp=vp_sub)

grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)

	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)

	if args.savedf:
		robjects.r("write.table")(df, file=args.savedf, sep="\t")

	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_step(size=2) \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

        # extrapolation
	if args.extrapolate:
		start = robjects.ListVector({'a': 1, 'b': 1})
                pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                              formula='y~a*I((x*3600)^b)',
                                              se='FALSE', start=start) \
                        + ggplot2.xlim(0, float(args.extrapolate))

	if args.theme_bw:
		pp = pp + ggplot2.theme_bw()	

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			logger.error("Unrecognized extension for %s!" % (plot_file))
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
number_of_peaks = len(dataf[0])


cvI = []
newRow = []
for i in range(1,number_of_peaks+1):
    row = dataf.rx(i,True)
    rowA = np.array(row)
    newRow.append(rowA[2:])
    cvI.append(cv(rowA[2:]))
#cv.append(rowA[2:].std()/rowA[2:].mean())
cv_r=robjects.conversion.py2ri(cvI)
df_cv = {'CV' : cv_r}
dataf_cv = robjects.DataFrame(df_cv)
dtf_cv = robjects.r.melt(dataf_cv)
d=dataf.cbind(dtf_cv.rx(2))
d.names[tuple(d.colnames).index('value')] = 'CV'
#d = base.merge_data_frame(dataf,dtf_cv.rx(2))
utilis.write_csv(d, options.csv_output)


dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1)))
#n_peak = robjects.IntVector(1,number_of_peaks)
gp = ggplot2.ggplot(dc)
pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \
ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV")

r.X11()
pp.plot()

Beispiel #28
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d',
                      dest='downstream',
                      default=2000,
                      type='int',
                      help='TSS downstream [Default: %default]')
    parser.add_option('-o',
                      dest='out_prefix',
                      default='tss',
                      help='Output prefix [Default: %default]')
    parser.add_option('-u',
                      dest='upstream',
                      default=5000,
                      type='int',
                      help='TSS upstream [Default: %default]')
    parser.add_option('--ymax',
                      dest='ymax',
                      default=None,
                      type='float',
                      help='Y-coordinate limit [Default: %default]')
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream, options.downstream + 1))
    labels = ro.StrVector(['Main'] *
                          (options.upstream + options.downstream + 1) +
                          ['Control'] *
                          (options.upstream + options.downstream + 1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage',
                                         limits=ro.FloatVector(
                                             [0, options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
Beispiel #29
0
    [x[1] for x in combos_r])
d['group'] = StrVector(
    [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))])
dataf = DataFrame(d)

from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop",
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop",
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.opts(title = "Benchmark (running time)")

from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png', width=712, height=512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'),
                  data=dataf,
                  na_action=stats.na_exclude)
##text_log+="average: "+str(rmean(test23)[0])+end
##text_log+="sum: "+str(rsum(test23)[0])+end
#
#roughbin= round(ma[0]/100)
#bins=round(roughbin/100)*100


#ma2=rmax(ed)

#dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true)

scales = importr('scales')

gp = ggplot2.ggplot(dataf)
	#geom_histogram(aes(y = ..density..))
	#   ggplot2.geom_density()+\

	    # pp = gp + ggplot2.aes_string(x='%s(contrrr)') +  ggplot2.geom_histogram()+ggplot2.scale_y_sqrt()
bins=10
teest3=robjects.r('theme(axis.text.x=element_text(angle=90))')

pp = gp + \
ggplot2.aes_string(x='Length') +  \
ggplot2.geom_histogram()+\
ggplot2.ggtitle("Found IS fragment lengths")+ \
ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\
ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \
teest3
pp.plot()
robjects.r.ggsave("/Users/security/science/dna_subj_hist.pdf")
Beispiel #31
0
def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)
	
	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)


	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_point() \
		+ ggplot2.geom_line() \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file)
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()

heat_demand = np.zeros(37)
Bdim = robjects.FloatVector([12,6])
for i,BO in enumerate(range(0,361,10)):
    res = ECR(Building_Orientation = BO,
                Building_Dim = Bdim)
    heat_demand[i] = res[2][0]

# Transfor to R data types
hd = robjects.FloatVector([h for h in heat_demand])
bo = robjects.FloatVector([b for b in range(0,361,10)])

# Create a python dictionary
p_datadic = {'Heat_Demand': hd,
             'Building_Orientation': bo}

# Create R data.frame
r_dataf = robjects.DataFrame(p_datadic)

# plot with ggplot2
gp = ggplot2.ggplot(r_dataf)
pp = gp + ggplot2.aes_string(y= 'Heat_Demand', x= 'Building_Orientation') + \
     ggplot2.geom_line(colour = "red", size = 1) + \
     ggplot2.coord_polar(direction = -1, start = -pi/2) + \
     ggplot2.ggtitle("Heat demand for all possible buildimg orientations") + \
     ggplot2.scale_x_continuous(breaks=robjects.FloatVector(range(0, 360, 15)))

pp.plot()
grdevices.dev_off()
Beispiel #33
0
def plot_collectors_curve(args, start_times, read_lengths):
    """
	Use rpy2 to create a collectors curve of the run
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]

    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
     for t in start_times])
    r_read_lengths = robjects.IntVector(read_lengths)

    # compute the cumulative based on reads or total base pairs
    if args.plot_type == 'reads':
        y_label = "Total reads"
        cumulative = \
         r.cumsum(robjects.IntVector([1] * len(start_times)))
    elif args.plot_type == 'basepairs':
        y_label = "Total base pairs"
        cumulative = r.cumsum(r_read_lengths)

    step = args.skip
    # make a data frame of the lists
    d = {
        'start':
        robjects.FloatVector(
            [r_start_times[n] for n in xrange(0, len(r_start_times), step)]),
        'lengths':
        robjects.IntVector(
            [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]),
        'cumul':
        robjects.IntVector(
            [cumulative[n] for n in xrange(0, len(cumulative), step)])
    }
    df = robjects.DataFrame(d)

    if args.savedf:
        robjects.r("write.table")(df, file=args.savedf, sep="\t")

    # title
    total_reads = len(read_lengths)
    total_bp = sum(read_lengths)
    plot_title = "Yield: " \
     + str(total_reads) + " reads and " \
     + str(total_bp) + " base pairs."

    # plot
    gp = ggplot2.ggplot(df)
    pp = gp + ggplot2.aes_string(x='start', y='cumul') \
     + ggplot2.geom_step(size=2) \
     + ggplot2.scale_x_continuous('Time (hours)') \
     + ggplot2.scale_y_continuous(y_label) \
     + ggplot2.ggtitle(plot_title)

    # extrapolation
    if args.extrapolate:
        start = robjects.ListVector({'a': 1, 'b': 1})
        pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                      formula='y~a*I((x*3600)^b)',
                                      se='FALSE', start=start) \
                + ggplot2.xlim(0, float(args.extrapolate))

    if args.theme_bw:
        pp = pp + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()