Esempio n. 1
0
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({
        "sample":
        robjects.StrVector(samp_vector),
        "value":
        robjects.FloatVector(samp_set1_vals + samp_set2_vals)
    })

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
Esempio n. 3
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]')
    parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]')
    parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]')
    parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]')
    (options,args) = parser.parse_args()
    
    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream,options.downstream+1))
    labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 4
0
def plot_coef(feat_mat_dir,
              model_dir,
              expt_names,
              pref,
              outfile=None,
              height=120,
              fsize=12):

    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names,
         tmp_gene_names) = read_feat_mat(feat_mat_file)

        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1)

    nexpt = expt_idx + 1

    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({
        'feature': np.repeat(feat_names, nexpt),
        'Classification': np.reshape(clf_coef, (clf_coef.size, )),
        'Regression': np.reshape(reg_coef, (reg_coef.size, ))
    })

    df2 = pd.melt(df, id_vars='feature', var_name='fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename=outfile,
                    plot=gp,
                    width=w,
                    height=height,
                    unit='mm')
    return df
Esempio n. 5
0
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)
    
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width) 
    ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
Esempio n. 6
0
def plot_hist(sizes, args):
    """
    Use rpy2 to plot a histogram of the read sizes
    """
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    sizes = robjects.IntVector([s for s in sizes \
                if s < args.max_length and s > args.min_length])

    sizes_min = min(sizes)
    sizes_max = max(sizes)

    binwidth = (sizes_max - sizes_min) / args.num_bins

    d = {'sizes': sizes}
    df = robjects.DataFrame(d)

    # plot
    gp = ggplot2.ggplot(df)

    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='sizes') \
                + ggplot2.geom_histogram(binwidth=binwidth)
    else:
        pp = gp + ggplot2.aes_string(x='sizes') \
            + ggplot2.geom_histogram(binwidth=binwidth) \
            + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Esempio n. 7
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
Esempio n. 8
0
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Esempio n. 9
0
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean,
                                          prefix=''):
    pandas2ri.activate()
    subgroups_to_lrr_count = {}
    columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []}
    for subgroup, (acc20means,
                   acc20_count) in subgroups_to_lrrs_acc20mean.items():
        subgroups_to_lrr_count[subgroup] = acc20_count
        for index, acc20mean in enumerate(acc20means):
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data['pos'].append(index + 1)
            columns_to_data['acc20'].append(acc20mean)

    # Write the count of LRRs for each subgroup to file
    with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"),
              'w') as f:
        for subgroup, lrr_count in subgroups_to_lrr_count.items():
            f.write(str.format("{}: {}\n", subgroup, lrr_count))

    # Generate the line chart file
    r_columns_to_data = {
        'subgroup': ro.StrVector(columns_to_data['subgroup']),
        'pos': ro.IntVector(columns_to_data['pos']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    line_chart_file_path = os.path.join(OUTPUT_PATH,
                                        prefix + "step3_5_lrr_acc20_line.png")
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx')))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Esempio n. 10
0
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif':motif_names, 'thresh':thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot = gp, width = width, height = 300, unit = 'mm')
Esempio n. 11
0
def plot_cv_r2(pandas_df,
               outfile,
               fsize=10,
               height=120,
               max_width=50,
               xlab='Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)

    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width)
    ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm')
Esempio n. 12
0
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Esempio n. 13
0
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname,
                                          line_chart_file_path):
    logging.debug(
        str.format("Begin to generate {}, data {}", line_chart_file_path,
                   ts_to_acc20s))
    ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s)
    columns_to_data = {tname: [], 'site': [], 'acc20': []}
    for ss, acc20means in ts_to_acc20mean.items():
        for index, acc20mean in enumerate(acc20means):
            columns_to_data[tname].append(ss)
            columns_to_data['site'].append(index - 5)
            columns_to_data['acc20'].append(acc20mean)

    # Generate the line chart file
    r_columns_to_data = {
        tname: ro.StrVector(columns_to_data[tname]),
        'site': ro.IntVector(columns_to_data['site']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))),
                                    labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5']))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Esempio n. 14
0
def plot_thresh_distr(motif_names, thresh, out_dir, width=350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif': motif_names, 'thresh': thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot=gp,
                    width=width,
                    height=300,
                    unit='mm')
Esempio n. 15
0
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile = None, height = 120, fsize = 12):
    
    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file)
        
        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert(all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis = 1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis = 1)
    
    nexpt = expt_idx + 1
    
    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({'feature':np.repeat(feat_names, nexpt),
                       'Classification':np.reshape(clf_coef, (clf_coef.size,)),
                       'Regression':np.reshape(reg_coef, (reg_coef.size,))})

    df2 = pd.melt(df, id_vars = 'feature', var_name = 'fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
    return df
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({"sample":robjects.StrVector(samp_vector),
                                 "value":robjects.FloatVector(samp_set1_vals + samp_set2_vals)})

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
Esempio n. 17
0
grdevices.dev_off()



grdevices.png('../../_static/graphics_ggplot2withgrid.png',
              width = 612, height = 612, antialias="subpixel", type="cairo")
#-- gridwithggplot2-begin
grid.newpage()

# create a viewport as the main plot
vp = grid.viewport(width = 1, height = 1) 
vp.push()

p = ggplot2.ggplot(datasets.rock) + \
    ggplot2.geom_point(ggplot2.aes_string(x = 'area', y = 'peri')) + \
    ggplot2.theme_bw()
p.plot(vp = vp)

vp = grid.viewport(width = 0.6, height = 0.6, x = 0.37, y=0.69)
vp.push()
p = ggplot2.ggplot(datasets.rock) + \
    ggplot2.geom_point(ggplot2.aes_string(x = 'area', y = 'shape')) + \
    ggplot2.opts(**{'axis.text.x': ggplot2.theme_text(angle = 45)})

p.plot(vp = vp)

#-- gridwithggplot2-end
grdevices.dev_off()


Esempio n. 18
0
def plot_collectors_curve(args, start_times, read_lengths):
    """
	Use rpy2 to create a collectors curve of the run
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]

    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
     for t in start_times])
    r_read_lengths = robjects.IntVector(read_lengths)

    # compute the cumulative based on reads or total base pairs
    if args.plot_type == 'reads':
        y_label = "Total reads"
        cumulative = \
         r.cumsum(robjects.IntVector([1] * len(start_times)))
    elif args.plot_type == 'basepairs':
        y_label = "Total base pairs"
        cumulative = r.cumsum(r_read_lengths)

    step = args.skip
    # make a data frame of the lists
    d = {
        'start':
        robjects.FloatVector(
            [r_start_times[n] for n in xrange(0, len(r_start_times), step)]),
        'lengths':
        robjects.IntVector(
            [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]),
        'cumul':
        robjects.IntVector(
            [cumulative[n] for n in xrange(0, len(cumulative), step)])
    }
    df = robjects.DataFrame(d)

    if args.savedf:
        robjects.r("write.table")(df, file=args.savedf, sep="\t")

    # title
    total_reads = len(read_lengths)
    total_bp = sum(read_lengths)
    plot_title = "Yield: " \
     + str(total_reads) + " reads and " \
     + str(total_bp) + " base pairs."

    # plot
    gp = ggplot2.ggplot(df)
    pp = gp + ggplot2.aes_string(x='start', y='cumul') \
     + ggplot2.geom_step(size=2) \
     + ggplot2.scale_x_continuous('Time (hours)') \
     + ggplot2.scale_y_continuous(y_label) \
     + ggplot2.ggtitle(plot_title)

    # extrapolation
    if args.extrapolate:
        start = robjects.ListVector({'a': 1, 'b': 1})
        pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                      formula='y~a*I((x*3600)^b)',
                                      se='FALSE', start=start) \
                + ggplot2.xlim(0, float(args.extrapolate))

    if args.theme_bw:
        pp = pp + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file,
                          width=8.5,
                          height=8.5,
                          units="in",
                          res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Esempio n. 19
0
              height=612,
              antialias="subpixel",
              type="cairo")
#-- gridwithggplot2-begin
grid.newpage()

# create a viewport as the main plot
vp = grid.viewport(width=1, height=1)
vp.push()

tmpenv = data(datasets).fetch("rock")
rock = tmpenv["rock"]

p = ggplot2.ggplot(rock) + \
    ggplot2.geom_point(ggplot2.aes_string(x = 'area', y = 'peri')) + \
    ggplot2.theme_bw()
p.plot(vp=vp)

vp = grid.viewport(width=0.6, height=0.6, x=0.37, y=0.69)
vp.push()
p = ggplot2.ggplot(rock) + \
    ggplot2.geom_point(ggplot2.aes_string(x = 'area', y = 'shape')) + \
    ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})

p.plot(vp=vp)

#-- gridwithggplot2-end
grdevices.dev_off()

#---
Esempio n. 20
0
    grdevices = importr('grDevices')

    ro.r('''change_name=function(pop_size, generations,freq){
		name=sprintf("../results/mcm_%sNe_%sfreq_%sgen.png", pop_size,freq,generations)
		return(name)}
	     ''')
    name = ro.r['change_name']
    name = name(args.ps, args.gen, args.freq)
    print("Output figure in:", name)

    grdevices.png(file=name, width=700, height=700)

    gp = ggplot2.ggplot(res2)
    pp = gp + ggplot2.aes_string(
        x='Counts', y='Proportion') + ggplot2.geom_bar(
            stat="identity", color="darkgoldenrod3") + ggplot2.theme_bw()
    pp.plot()
    grdevices.dev_off()
    print("Plot done!")

elif (
        args.diff
):  ###references:doi: 10.1093/molbev/msx254 &&  https://doi.org/10.1111/j.1365-294X.2010.04997.x
    p = args.freq
    N = args.ps
    t = args.gen
    newx = []
    x = np.arange(0, 1.001, 0.001001001)
    res = [0] * len(x)
    print("Estimating allele counts")
    for i in range(1, 101):
Esempio n. 21
0
#!/usr/bin/python

from datetime import datetime
import sys
import subprocess
import os
import math

import rpy2.robjects as robjects
robjects.r('library("scales")')
import rpy2.robjects.lib.ggplot2 as ggplot2
ggplot2.theme_set(ggplot2.theme_bw())
#print ggplot2.theme_get()
from rpy2.robjects.packages import importr
from rpy2.robjects import FloatVector, StrVector, IntVector, DataFrame


def ggplot2_options():
    return ggplot2.opts(
        **{
            'axis.title.x':
            ggplot2.theme_blank(),
            'axis.title.y':
            ggplot2.theme_text(
                family='serif', face='bold', size=15, angle=90, vjust=0.2),
            'axis.text.x':
            ggplot2.theme_text(family='serif', size=15),
            'axis.text.y':
            ggplot2.theme_text(family='serif', size=15),
            'legend.title':
            ggplot2.theme_text(family='serif', face='bold', size=15),
Esempio n. 22
0
    iris_py = pandas.read_csv("/home/yarden/iris.csv")
    iris_py = iris_py.rename(columns={"Name": "Species"})
    corrs = []
    from scipy.stats import spearmanr
    for species in set(iris_py.Species):
        entries = iris_py[iris_py["Species"] == species]
        c = spearmanr(entries["SepalLength"], entries["SepalWidth"])
        print "c: ", c

    # compute r.cor(x, y) and divide up by Species
    # Assume we get a vector of length Species saying what the
    # correlation is for each Species' Petal Length/Width
    p = ggplot2.ggplot(iris) + \
        ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \
        ggplot2.facet_wrap(Formula("~Species")) 
    p.plot()
    r["dev.off"]()    

    sys.exit(1)
    grdevices = importr('grDevices')
    ggplot2.theme_set(ggplot2.theme_bw(12))

    p = ggplot2.ggplot(iris) + \
        ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \
        ggplot2.facet_wrap(Formula('~ Species'), ncol=2, nrow = 2) + \
        ggplot2.geom_text(aes_string(x="Sepal.Length", y="Sepal.Width"), label="t") + \
        ggplot2.GBaseObject(r('ggplot2::coord_fixed')()) # aspect ratio
    p.plot()

Esempio n. 23
0
#!/usr/bin/env python
from problems import *
from utils import *
from config import *
import sys
import rpy2.robjects as robjects
robjects.r('library("scales")')
import rpy2.robjects.lib.ggplot2 as ggplot2
ggplot2.theme_set(ggplot2.theme_bw ())
from rpy2.robjects.packages import importr
from rpy2.robjects import FloatVector, StrVector, IntVector, DataFrame

def ggplot2_options ():
  def normal_text():
    return ggplot2.theme_text(family = 'serif', size = 15)
  def bold_text():
    return ggplot2.theme_text(family = 'serif', face = 'bold', size = 15)
  def rotated_text():
    return ggplot2.theme_text(family = 'serif', face = 'bold', 
                              size = 15, angle=90, vjust=0.2)

  return ggplot2.opts (**{'axis.title.x' : ggplot2.theme_blank(),
                          'axis.title.y' : rotated_text(),
                          'axis.text.x' : normal_text(),
                          'axis.text.y' : normal_text(),
                          'legend.title' : bold_text(),
                          'legend.text' : normal_text(),
                          'aspect.ratio' : 0.6180339888,
                          'strip.text.x' : normal_text(),
                          })
Esempio n. 24
0
number_of_peaks = len(dataf[0])


cvI = []
newRow = []
for i in range(1,number_of_peaks+1):
    row = dataf.rx(i,True)
    rowA = np.array(row)
    newRow.append(rowA[2:])
    cvI.append(cv(rowA[2:]))
#cv.append(rowA[2:].std()/rowA[2:].mean())
cv_r=robjects.conversion.py2ri(cvI)
df_cv = {'CV' : cv_r}
dataf_cv = robjects.DataFrame(df_cv)
dtf_cv = robjects.r.melt(dataf_cv)
d=dataf.cbind(dtf_cv.rx(2))
d.names[tuple(d.colnames).index('value')] = 'CV'
#d = base.merge_data_frame(dataf,dtf_cv.rx(2))
utilis.write_csv(d, options.csv_output)


dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1)))
#n_peak = robjects.IntVector(1,number_of_peaks)
gp = ggplot2.ggplot(dc)
pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \
ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV")

r.X11()
pp.plot()

def plot_collectors_curve(args, start_times, read_lengths):
	"""
	Use rpy2 to create a collectors curve of the run
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]

	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \
		for t in start_times])
	r_read_lengths = robjects.IntVector(read_lengths)

	# compute the cumulative based on reads or total base pairs
	if args.plot_type == 'reads':
		y_label = "Total reads"
		cumulative = \
			r.cumsum(robjects.IntVector([1] * len(start_times)))
	elif args.plot_type == 'basepairs':
		y_label = "Total base pairs"
		cumulative = r.cumsum(r_read_lengths)

	# make a data frame of the lists
	d = {'start': r_start_times, 
		'lengths': r_read_lengths,
		'cumul': cumulative}
	df = robjects.DataFrame(d)

	if args.savedf:
		robjects.r("write.table")(df, file=args.savedf, sep="\t")

	# title
	total_reads = len(read_lengths)
	total_bp = sum(read_lengths)
	plot_title = "Yield: " \
		+ str(total_reads) + " reads and " \
		+ str(total_bp) + " base pairs."

	# plot
	gp = ggplot2.ggplot(df)
	pp = gp + ggplot2.aes_string(x='start', y='cumul') \
		+ ggplot2.geom_step(size=2) \
		+ ggplot2.scale_x_continuous('Time (hours)') \
		+ ggplot2.scale_y_continuous(y_label) \
		+ ggplot2.ggtitle(plot_title)

        # extrapolation
	if args.extrapolate:
		start = robjects.ListVector({'a': 1, 'b': 1})
                pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls',
                                              formula='y~a*I((x*3600)^b)',
                                              se='FALSE', start=start) \
                        + ggplot2.xlim(0, float(args.extrapolate))

	if args.theme_bw:
		pp = pp + ggplot2.theme_bw()	

	if args.saveas is not None:
		plot_file = args.saveas
		if plot_file.endswith(".pdf"):
			grdevices.pdf(plot_file, width = 8.5, height = 8.5)
		elif plot_file.endswith(".png"):
			grdevices.png(plot_file, width = 8.5, height = 8.5, 
				units = "in", res = 300)
		else:
			logger.error("Unrecognized extension for %s!" % (plot_file))
			sys.exit()

		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
Esempio n. 26
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []

    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0

        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0,
                                                                            1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))

                df = ro.DataFrame({
                    'x': ro.FloatVector(expr[:, tmp_idx[i]]),
                    'y': ro.FloatVector(expr[:, tmp_idx[j]])
                })
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})

                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir,
                                           ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename=outfile,
                                plot=gp,
                                width=85,
                                height=85,
                                unit='mm')
    df = pd.DataFrame({
        'name1': names_1,
        'name2': names_2,
        'cor': cors
    },
                      index=titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t')
    return df
Esempio n. 27
0
def main():
    usage = 'usage: %prog [options] <raw file>'
    parser = OptionParser(usage)
    parser.add_option('-d',
                      dest='downstream',
                      default=2000,
                      type='int',
                      help='TSS downstream [Default: %default]')
    parser.add_option('-o',
                      dest='out_prefix',
                      default='tss',
                      help='Output prefix [Default: %default]')
    parser.add_option('-u',
                      dest='upstream',
                      default=5000,
                      type='int',
                      help='TSS upstream [Default: %default]')
    parser.add_option('--ymax',
                      dest='ymax',
                      default=None,
                      type='float',
                      help='Y-coordinate limit [Default: %default]')
    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error('Must provide raw file')
    else:
        raw_file = args[0]

    # collect data
    coords = []
    main_cov = []
    control_cov = []
    for line in open(raw_file):
        a = line.split()
        coords.append(int(a[0]))
        main_cov.append(float(a[1]))
        control_cov.append(float(a[2]))

    # data structures
    tss_i = ro.IntVector(range(-options.upstream, options.downstream + 1))
    labels = ro.StrVector(['Main'] *
                          (options.upstream + options.downstream + 1) +
                          ['Control'] *
                          (options.upstream + options.downstream + 1))
    cov = ro.FloatVector(main_cov + control_cov)

    df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

    # plot
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_colour_discrete('')
    '''
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
        ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
        ggplot2.scale_x_continuous('TSS Position') + \
        ggplot2.scale_colour_discrete('') + \
        ggplot2.theme_bw()

    if options.ymax == None:
        gp += ggplot2.scale_y_continuous('Coverage')
    else:
        gp += ggplot2.scale_y_continuous('Coverage',
                                         limits=ro.FloatVector(
                                             [0, options.ymax]))

    # save to file
    grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 28
0
def plot_squiggle(args, filename, start_times, mean_signals):
    """
	Use rpy2 to create a squiggle plot of the read
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]
    total_time = start_times[-1] - start_times[0]
    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
    r_mean_signals = robjects.FloatVector(mean_signals)

    # infer the appropriate number of events given the number of facets
    num_events = len(r_mean_signals)
    events_per_facet = (num_events / args.num_facets) + 1
    # dummy variable to control faceting
    facet_category = robjects.FloatVector([(i / events_per_facet) + 1
                                           for i in range(len(start_times))])

    # make a data frame of the start times and mean signals
    d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
    df = robjects.DataFrame(d)

    gp = ggplot2.ggplot(df)
    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
    else:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
         + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = os.path.basename(filename) + "." + args.saveas
        if os.path.isfile(plot_file):
            raise Exception(
                'Cannot create plot for %s: plot file %s already exists' %
                (filename, plot_file))
        if args.saveas == "pdf":
            grdevices.pdf(plot_file, width=8.5, height=11)
        elif args.saveas == "png":
            grdevices.png(plot_file, width=8.5, height=11, units="in", res=300)
        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Esempio n. 29
0
def plot_hist(sizes, args):
    """
	Use rpy2 to plot a histogram of the read sizes
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    sizes = robjects.IntVector([s for s in sizes if s < args.max_length and s > args.min_length])

    sizes_min = min(sizes)
    sizes_max = max(sizes)

    binwidth = (sizes_max - sizes_min) / args.num_bins

    d = {"sizes": sizes}
    df = robjects.DataFrame(d)

    # plot
    gp = ggplot2.ggplot(df)

    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x="sizes") + ggplot2.geom_histogram(binwidth=binwidth)
    else:
        pp = gp + ggplot2.aes_string(x="sizes") + ggplot2.geom_histogram(binwidth=binwidth) + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=8.5, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=8.5, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print("Type enter to exit.")
        raw_input()
Esempio n. 30
0
def plot_qc_reads(qc_df):
    """
    Plot number of reads part of a pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0)#.set_index("sample")
    cols = ["sample",
            "num_reads",
            "num_mapped",
            "num_unique_mapped",
            "num_junctions"]
    qc_df = qc_df[cols]
    melted_qc = pandas.melt(qc_df, id_vars=["sample"])
    qc_r = conversion_pydataframe(melted_qc)
    labels = tuple(["num_reads",
                    "num_mapped",
                    "num_unique_mapped",
                    "num_junctions"])
    labels = robj.StrVector(labels)
    variable_i = qc_r.names.index('variable')
    qc_r[variable_i] = robj.FactorVector(qc_r[variable_i],
                                         levels = labels)
    ggplot2.theme_set(ggplot2.theme_bw(12))
    scales = importr("scales")
    r_opts = r.options(scipen=4)
    p = ggplot2.ggplot(qc_r) + \
        ggplot2.geom_point(aes_string(x="sample", y="value")) + \
        ggplot2.scale_y_continuous(trans=scales.log10_trans(),
                                   breaks=scales.trans_breaks("log10",
                                                              robj.r('function(x) 10^x')),
                                   labels=scales.trans_format("log10",
                                                              robj.r('math_format(10^.x)'))) + \
        r.xlab("CLIP-Seq samples") + \
        r.ylab("No. reads") + \
        ggplot2.coord_flip() + \
        ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \
        theme(**{"panel.grid.major.x": element_blank(),
                 "panel.grid.minor.x": element_blank(),
                 "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)})
    p.plot()

    return
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r.par(bty="n", lwd=1.7, lty=2)
    r_opts = r.options(scipen=4)
    r.options(r_opts)
    r.dotchart(convert_to_r_matrix(qc_df[["num_reads",
                                          "num_mapped",
                                          "num_unique_mapped"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped",
                                          "num_ribo",
                                          "num_junctions"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)