コード例 #1
0
ファイル: squiggle.py プロジェクト: JohnUrban/poretools
def plot_squiggle(args, filename, start_times, mean_signals):
	"""
	Use rpy2 to create a squiggle plot of the read
	"""
	r = robjects.r
	r.library("ggplot2")
	grdevices = importr('grDevices')

	# set t_0 as the first measured time for the read.
	t_0 = start_times[0]
	total_time = start_times[-1] - start_times[0]
	# adjust times to be relative to t_0
	r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
	r_mean_signals = robjects.FloatVector(mean_signals)
	
	# infer the appropriate number of events given the number of facets
	num_events = len(r_mean_signals)
	events_per_facet = (num_events / args.num_facets) + 1
	# dummy variable to control faceting
	facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))])

	# make a data frame of the start times and mean signals
	d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
	df = robjects.DataFrame(d)

	gp = ggplot2.ggplot(df)
	if not args.theme_bw:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
	else:
		pp = gp + ggplot2.aes_string(x='start', y='mean') \
			+ ggplot2.geom_step(size=0.25) \
			+ ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
			+ ggplot2.scale_x_continuous('Time (seconds)') \
			+ ggplot2.scale_y_continuous('Mean signal (picoamps)') \
			+ ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
			+ ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
			+ ggplot2.theme_bw()

	if args.saveas is not None:
		plot_file = os.path.basename(filename) + "." + args.saveas
		if os.path.isfile(plot_file):
			raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file))
		if args.saveas == "pdf":
			grdevices.pdf(plot_file, width = 8.5, height = 11)
		elif args.saveas == "png":
			grdevices.png(plot_file, width = 8.5, height = 11, 
				units = "in", res = 300)
		pp.plot()
		grdevices.dev_off()
	else:
		pp.plot()
		# keep the plot open until user hits enter
		print('Type enter to exit.')
		raw_input()
コード例 #2
0
    def _plot_with_rpy2(self, regions, filename):
        from rpy2 import robjects
        import rpy2.robjects.lib.ggplot2 as ggplot2
        from rpy2.robjects.lib import grid
        from rpy2.robjects.packages import importr
        grdevices = importr('grDevices')
        base = importr('base')
        grdevices.pdf(file=filename + '.pdf')

        t = [x for x in range(-self.num_bins, self.num_bins + 1)]
        for region in regions[:self.num_regs]:
            if not np.any(region.weighted):
                logger.warning(
                    "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.")
                continue
            middle = (len(region.weighted[0]) - 1) / 2
            if middle < self.num_bins:
                logger.error("Warning: There are less bins calculated for regions than you want to plot.")
                sys.exit(1)
            d = {'map': robjects.StrVector(
                [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]),
                't': robjects.FloatVector(t * len(region.weighted)),
                'e': robjects.FloatVector([i for sublist in region.weighted for i in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]])}
            dataf = robjects.DataFrame(d)
            gp = ggplot2.ggplot(dataf)  # first yellow second red
            p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle(
                "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs(
                y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45),
                   'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(
                y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(y="-log10(q-value)",
                                                                  x='bins (' + str(self.bin_res) + ' bp each)') + \
                 ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))),
                                    colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \
                 ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            g1 = ggplot2.ggplot2.ggplotGrob(p1)
            g2 = ggplot2.ggplot2.ggplotGrob(p2)
            g3 = ggplot2.ggplot2.ggplotGrob(p3)
            robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first')
            robjects.r("grid::grid.draw(g)")
            grid.newpage()
            logger.debug('Plotted region ' + str(region.bin))

        grdevices.dev_off()
コード例 #3
0
ファイル: generate_figs.py プロジェクト: phytolrr/phytolrr
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
コード例 #4
0
ファイル: generate_figs.py プロジェクト: phytolrr/phytolrr
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean,
                                          prefix=''):
    pandas2ri.activate()
    subgroups_to_lrr_count = {}
    columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []}
    for subgroup, (acc20means,
                   acc20_count) in subgroups_to_lrrs_acc20mean.items():
        subgroups_to_lrr_count[subgroup] = acc20_count
        for index, acc20mean in enumerate(acc20means):
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data['pos'].append(index + 1)
            columns_to_data['acc20'].append(acc20mean)

    # Write the count of LRRs for each subgroup to file
    with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"),
              'w') as f:
        for subgroup, lrr_count in subgroups_to_lrr_count.items():
            f.write(str.format("{}: {}\n", subgroup, lrr_count))

    # Generate the line chart file
    r_columns_to_data = {
        'subgroup': ro.StrVector(columns_to_data['subgroup']),
        'pos': ro.IntVector(columns_to_data['pos']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    line_chart_file_path = os.path.join(OUTPUT_PATH,
                                        prefix + "step3_5_lrr_acc20_line.png")
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx')))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
コード例 #5
0
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
コード例 #6
0
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
コード例 #7
0
def plot(data, x, y, ylabel, color, filename):
    gp = ggplot2.ggplot(data=data)
    gp = gp + \
    ggplot2.geom_line(ggplot2.aes_string(x=x, y=y), color=color) + \
    ggplot2.theme(**{'axis.text.x' : ggplot2.element_text(angle = 90, hjust = 1),
                      'strip.text.y' : ggplot2.element_text(size = 6, angle=90)})  + \
    ggplot2.scale_y_continuous(ylabel) 
    ggplot2.ggplot2.ggsave(filename, gp)
コード例 #8
0
ファイル: r_plot.py プロジェクト: yarden/biorpy
def get_nogrid_theme():
    """
    Get no grid theme for ggplot2.
    """
    nogrid_x_theme = theme(**{'panel.grid.major.x': element_blank(),
                              'panel.grid.minor.x': element_blank(),
                              'panel.grid.major.y': element_blank(),
                              'panel.grid.minor.y': element_blank()})
    return nogrid_x_theme
コード例 #9
0
ファイル: generate_figs.py プロジェクト: phytolrr/phytolrr
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
コード例 #10
0
ファイル: generate_figs.py プロジェクト: phytolrr/phytolrr
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname,
                                          line_chart_file_path):
    logging.debug(
        str.format("Begin to generate {}, data {}", line_chart_file_path,
                   ts_to_acc20s))
    ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s)
    columns_to_data = {tname: [], 'site': [], 'acc20': []}
    for ss, acc20means in ts_to_acc20mean.items():
        for index, acc20mean in enumerate(acc20means):
            columns_to_data[tname].append(ss)
            columns_to_data['site'].append(index - 5)
            columns_to_data['acc20'].append(acc20mean)

    # Generate the line chart file
    r_columns_to_data = {
        tname: ro.StrVector(columns_to_data[tname]),
        'site': ro.IntVector(columns_to_data['site']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))),
                                    labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5']))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
コード例 #11
0
def plot_coef(feat_mat_dir,
              model_dir,
              expt_names,
              pref,
              outfile=None,
              height=120,
              fsize=12):

    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names,
         tmp_gene_names) = read_feat_mat(feat_mat_file)

        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1)

    nexpt = expt_idx + 1

    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({
        'feature': np.repeat(feat_names, nexpt),
        'Classification': np.reshape(clf_coef, (clf_coef.size, )),
        'Regression': np.reshape(reg_coef, (reg_coef.size, ))
    })

    df2 = pd.melt(df, id_vars='feature', var_name='fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename=outfile,
                    plot=gp,
                    width=w,
                    height=height,
                    unit='mm')
    return df
コード例 #12
0
 def plot_similarity_matrix(self, item_type, image_file, title):
     '''Plot similarities of crawls (overlap of unique items)
     as heat map matrix'''
     data = defaultdict(dict)
     n = 1
     for crawl1 in self.similarity[item_type]:
         for crawl2 in self.similarity[item_type][crawl1]:
             similarity = self.similarity[item_type][crawl1][crawl2]
             data['crawl1'][n] = MonthlyCrawl.short_name(crawl1)
             data['crawl2'][n] = MonthlyCrawl.short_name(crawl2)
             data['similarity'][n] = similarity
             data['sim_rounded'][n] = similarity  # to be rounded
             n += 1
     data = pandas.DataFrame(data)
     print(data)
     # select median of similarity values as midpoint of similarity scale
     midpoint = data['similarity'].median()
     decimals = 3
     textsize = 2
     minshown = .0005
     if (data['similarity'].max()-data['similarity'].min()) > .2:
         decimals = 2
         textsize = 2.8
         minshown = .005
     data['sim_rounded'] = data['sim_rounded'].apply(
         lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0')
         if x >= minshown else '0')
     print('Median of similarities for', item_type, '=', midpoint)
     matrix_size = len(self.similarity[item_type])
     if matrix_size > self.MAX_MATRIX_SIZE:
         n = 0
         for crawl1 in sorted(self.similarity[item_type], reverse=True):
             short_name = MonthlyCrawl.short_name(crawl1)
             if n > self.MAX_MATRIX_SIZE:
                 data = data[data['crawl1'] != short_name]
                 data = data[data['crawl2'] != short_name]
             n += 1
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl2', y='crawl1',
                              fill='similarity', label='sim_rounded') \
         + ggplot2.geom_tile(color="white") \
         + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white",
                                        midpoint=midpoint, space="Lab") \
         + GGPLOT2_THEME \
         + ggplot2.coord_fixed() \
         + ggplot2.theme(**{'axis.text.x':
                            ggplot2.element_text(angle=45,
                                                 vjust=1, hjust=1)}) \
         + ggplot2.labs(title=title, x='', y='') \
         + ggplot2.geom_text(color='black', size=textsize)
     img_path = os.path.join(PLOTDIR, image_file)
     p.save(img_path)
     return p
コード例 #13
0
ファイル: rnapredPlots.py プロジェクト: sofiakp/rnapred
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)
    
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width) 
    ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
コード例 #14
0
ファイル: boxes.py プロジェクト: escott55/me_variome
def makeDistanceBox( alldata, figurename, feature="distance") :
    alldata["distance"] = alldata.het + alldata.hom

    r_dataframe = com.convert_to_r_dataframe(alldata)
    p = ggplot2.ggplot(r_dataframe) + \
                ggplot2.aes_string(x="factor(continent)", y=feature) + \
                ggplot2.geom_boxplot() + \
                ggplot2.ggtitle("Distance from Reference by Continent") + \
                ggplot2.theme(**mytheme) #+ \
                #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \
                #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') )

    grdevices.png(figurename)
    p.plot()
    grdevices.dev_off()
コード例 #15
0
ファイル: convertAPTresults.py プロジェクト: sofiakp/rnapred
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
コード例 #16
0
def plot_cv_r2(pandas_df,
               outfile,
               fsize=10,
               height=120,
               max_width=50,
               xlab='Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)

    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width)
    ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm')
コード例 #17
0
ファイル: motifPlotUtils.py プロジェクト: sofiakp/roadmap
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif':motif_names, 'thresh':thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot = gp, width = width, height = 300, unit = 'mm')
コード例 #18
0
ファイル: motifPlotUtils.py プロジェクト: sofiakp/roadmap
def plot_thresh_distr(motif_names, thresh, out_dir, width=350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif': motif_names, 'thresh': thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot=gp,
                    width=width,
                    height=300,
                    unit='mm')
コード例 #19
0
ファイル: rnapredPlots.py プロジェクト: sofiakp/rnapred
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile = None, height = 120, fsize = 12):
    
    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file)
        
        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert(all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis = 1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis = 1)
    
    nexpt = expt_idx + 1
    
    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({'feature':np.repeat(feat_names, nexpt),
                       'Classification':np.reshape(clf_coef, (clf_coef.size,)),
                       'Regression':np.reshape(reg_coef, (reg_coef.size,))})

    df2 = pd.melt(df, id_vars = 'feature', var_name = 'fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
    return df
コード例 #20
0
 def plot_stacked_bar(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
コード例 #21
0
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
コード例 #22
0
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['size'] = data['size'].astype(float)
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts (before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
コード例 #23
0
ファイル: gui.py プロジェクト: kenziD/A-Wonderful-Life
def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
コード例 #24
0
def rest():
    df = q1_median_q3_rep_wide
    pops = ["pdc", "dc-cd11b", "dc-cd8a"]

    stats_l = []
    for stat, (popa, popb) in product(["Q1", "median", "Q3"],
                                      product(pops, pops)):
        print(stat, popa, popb)

        popa = "hsc"
        popb = "pdc"
        stat = "median"

        mw_u, pvalue = scipy.stats.mannwhitneyu(
            [0.8, 0.81, 0.79],
            [0.4, 0.39, 0.41],
            # df.query("Population == @popa")[stat].to_numpy(),
            # df.query("Population == @popb")[stat].to_numpy(),
            use_continuity=True,
            alternative="two-sided",
        )
        pvalue

        stats_l.append([stat, popa, popb, mw_u, pvalue])
    stats_df = pd.DataFrame(stats_l).set_axis(
        ["stat", "popA", "popB", "U", "pvalue"], axis=1)

    kruskal_format_means = pd.pivot(
        q1_median_q3_rep_wide.query("Population in @pops"),
        index="Population",
        columns="Replicate",
        values="mean",
    )

    import scikit_posthocs

    stat, p_value = scipy.stats.kruskal(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    dunn_res_df = scikit_posthocs.posthoc_dunn(
        kruskal_format_means.to_numpy(),
        p_adjust='fdr_bh',
        sort=True,
    )

    stat, pvalue = scipy.stats.f_oneway(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    import statsmodels

    df = kruskal_format_means.stack().reset_index()

    kruskal_format_means

    res = statsmodels.stats.multicomp.pairwise_tukeyhsd(
        df[0], df['Population'].to_numpy(), alpha=0.05)

    res.pvalues
    res.summary()

    # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F)

    plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"]

    results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt"
    point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png"

    q1_median_q3_rep_wide

    ggplot_data = (
        q1_median_q3_rep_long.query("Population in @plot_pops").sort_values(
            "value",
            ascending=False,
        ).groupby(["Population", "stat"]).apply(
            lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1))))

    g = (gg.ggplot(ggplot_data) + gg.aes_string(
        x="Population", y="value", group="group_order", color="stat") +
         gg.geom_point(position=gg.position_dodge(width=0.5), size=1) +
         mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x=''))
    a = 3

    rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6)))

    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        # additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(6),
    )

    q1_median_q3_rep_wide

    g = (
        gg.ggplot(
            q1_median_q3_rep_wide.query("Population in @plot_pops").assign(
                sample=lambda df: df["Population"].astype(str) + df[
                    "Replicate"].astype(str))) + gg.geom_boxplot(
                        gg.aes_string(
                            x="Population",
                            fill="Population",
                            group="sample",
                            lower="Q1",
                            upper="Q3",
                            middle="median",
                            ymin="min1",
                            ymax="max99",
                            # position=gg.position_dodge(width=0.5),
                        ),
                        stat="identity",
                    )
        # + mh_rpy2_styling.gg_paper_theme
        + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) +
        gg.scale_fill_brewer(guide=False))
    a = 3
    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(7),
    )
    # image_png2(g, (ut.cm(12), ut.cm(12)))

    beta_values.loc[:, ("hsc", "1")]
コード例 #25
0
ファイル: graphics.py プロジェクト: realmichaelzyy/cs249-2
#   lines

# plot with both log and linear y scales
# aes_string: set the axis labels and what we're plotting
# opts: sets thickness of the lines
#   note the use of **{} to allow setting "legend.key.size" as a keyword
# scale_colour_manual: associate color datasets with actual colors and names
# geom_point and geom_line: thicker points and lines
# scale_linetype_manual: associate perf types with linetypes
for col_i, yscale in enumerate(['log', 'linear']):
    vp = grid.viewport(**{'layout.pos.col': col_i + 1, 'layout.pos.row': 1})
    pp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='variable', y='Performance', color='color',
                           shape='PerfType', linetype='PerfType') + \
        ggplot2.ggtitle('Performance vs. Color') + \
        ggplot2.theme(**{'legend.key.size' : ro.r.unit(1.4, "lines") } ) + \
        ggplot2.scale_colour_manual("Color",
                                    values=colormap,
                                    breaks=colormap.names,
                                    labels=[elt[1] for elt in
                                            colormap_labels]) + \
        ggplot2.geom_point(size=3) + \
        ggplot2.scale_linetype_manual(values=linemap) + \
        ggplot2.geom_line(size=1.5)

    # custom y-axis lines: major lines ("breaks") are every 10^n; 9
    #   minor lines ("minor_breaks") between major lines
    if (yscale == 'log'):
        pp = pp + \
            ggplot2.scale_y_log10(breaks = ro.r("10^(%d:%d)" % (gflops_range[0],
                                                                gflops_range[1])),
コード例 #26
0
# Create animated .gif of pickups by hour
import imageio

file_names = sorted((fn for fn in os.listdir('./plots') if fn.startswith('taxi_pickups')))
file_names = ['plots/' + s for s in file_names]
images = []
for filename in file_names:
    images.append(imageio.imread(filename))
imageio.mimsave('./plots/pickups_movie.gif', images, duration=0.4)

# total pickups by date, color
p1 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \
ggplot2.aes_string(x='date', y='total_pickups', color='type') + \
ggplot2.scale_colour_manual(values = robjects.StrVector(['green', 'yellow'])) + \
ggplot2.geom_line() + \
ggplot2.theme(legend_position='bottom') + \
ggplot2.labs(y='Total Pickups', x='Date', title='Total Pickups by Date')
p1.save('./plots/pickups_by_date.png', width=6, height=5)

# average fare and tip by date, color
p2 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \
ggplot2.aes_string(x='date', y='fare_amount', color='type') + \
ggplot2.scale_colour_manual(values = robjects.StrVector(['green', 'yellow'])) + \
ggplot2.geom_line() + \
ggplot2.theme(legend_position='bottom') + \
ggplot2.labs(y='Average Fare ($)', x='Date', title='Average Fare by Date')
p2.save('./plots/fares_by_date.png', width=6, height=5)

p3 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \
ggplot2.aes_string(x='date', y='tip_amount', color='type') + \
ggplot2.scale_colour_manual(values = robjects.StrVector(['green', 'yellow'])) + \
コード例 #27
0
fte_theme = theme(
    **{
        'axis.ticks':
        element_blank(),
        'panel.background':
        element_rect(fill=robjects.r.color_background,
                     color=robjects.r.color_background),
        'plot.background':
        element_rect(fill=robjects.r.color_background,
                     color=robjects.r.color_background),
        'panel.border':
        element_rect(
            color=robjects.r.color_background
        ),  #'panel.grid.major':element_line(color=robjects.r.color_grid_major, size = 0.25),
        'panel.grid.minor':
        element_blank(),
        'axis.ticks':
        element_blank(),
        'legend.position':
        "right",
        'legend.background':
        element_rect(fill=robjects.r.color_background),
        'legend.text':
        element_text(size=10, color=robjects.r.color_axis_title),
        'legend.title':
        element_blank(),
        'plot.title':
        element_text(
            size=12, color=robjects.r.color_title, vjust=1.25, hjust=0),
        'axis.text.x':
        element_text(size=10, color=robjects.r.color_axis_text),
        'axis.text.y':
        element_text(size=10, color=robjects.r.color_axis_text),
        'axis.title.x':
        element_text(size=10, color=robjects.r.color_axis_title, vjust=0),
        #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25),
        'axis.title.y':
        element_text(size=10, color=robjects.r.color_axis_title, angle=90)
    })
コード例 #28
0
ファイル: squiggle.py プロジェクト: tmfarrell/ont_dap
def plot_squiggle(args, filename, start_times, mean_signals):
    """
	Use rpy2 to create a squiggle plot of the read
	"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    # set t_0 as the first measured time for the read.
    t_0 = start_times[0]
    total_time = start_times[-1] - start_times[0]
    # adjust times to be relative to t_0
    r_start_times = robjects.FloatVector([t - t_0 for t in start_times])
    r_mean_signals = robjects.FloatVector(mean_signals)

    # infer the appropriate number of events given the number of facets
    num_events = len(r_mean_signals)
    events_per_facet = (num_events / args.num_facets) + 1
    # dummy variable to control faceting
    facet_category = robjects.FloatVector([(i / events_per_facet) + 1
                                           for i in range(len(start_times))])

    # make a data frame of the start times and mean signals
    d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category}
    df = robjects.DataFrame(d)

    gp = ggplot2.ggplot(df)
    if not args.theme_bw:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)})
    else:
        pp = gp + ggplot2.aes_string(x='start', y='mean') \
         + ggplot2.geom_step(size=0.25) \
         + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \
         + ggplot2.scale_x_continuous('Time (seconds)') \
         + ggplot2.scale_y_continuous('Mean signal (picoamps)') \
         + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \
         + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \
         + ggplot2.theme_bw()

    if args.saveas is not None:
        plot_file = os.path.basename(filename) + "." + args.saveas
        if os.path.isfile(plot_file):
            raise Exception(
                'Cannot create plot for %s: plot file %s already exists' %
                (filename, plot_file))
        if args.saveas == "pdf":
            grdevices.pdf(plot_file, width=8.5, height=11)
        elif args.saveas == "png":
            grdevices.png(plot_file, width=8.5, height=11, units="in", res=300)
        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
コード例 #29
0
                                  group = 'group', \
                                  color = 'ObamaShare', \ 
                                  fill = 'ObamaShare')) + \
 ggplot2.scale_fill_gradient(high = 'blue', \
                             low = 'red') + \
 ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
 ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
 ggplot2.theme(**{ 'legend.position': 'left', \ 
                   'legend.key.size': R.r.unit(2, 'lines'), \
                   'legend.title' : ggplot2.element_text(size = 14, hjust=0), \
                   'legend.text': ggplot2.element_text(size = 12), \ 
                   'title' : ggplot2.element_text('Obama Vote Share and Distance to Railroads in IL'), \
                   'plot.title': ggplot2.element_text(size = 24),
                   'plot.margin': R.r.unit(R.r.rep(0,4),'lines'), \
                   'panel.background': ggplot2.element_blank(), \
                   'panel.grid.minor': ggplot2.element_blank(), \
                   'panel.grid.major': ggplot2.element_blank(), \
                   'axis.ticks': ggplot2.element_blank(), \ 
                   'axis.title.x': ggplot2.element_blank(), \
                   'axis.title.y': ggplot2.element_blank(), \
                   'axis.title.x': ggplot2.element_blank(), \
                   'axis.title.x': ggplot2.element_blank(), \
                   'axis.text.x': ggplot2.element_blank(), \
                   'axis.text.y': ggplot2.element_blank()} ) + \
 ggplot2.geom_line(ggplot2.aes(x='long',
                               y='lat',
                               group='group'),
                   data=IL_railroads,
                   color='grey',
                   size=0.2) + \
 ggplot2.coord_equal()
コード例 #30
0
ファイル: gui.py プロジェクト: kenziD/A-Wonderful-Life
def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
コード例 #31
0
ファイル: convertAPTresults.py プロジェクト: sofiakp/rnapred
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []

    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0

        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0,
                                                                            1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))

                df = ro.DataFrame({
                    'x': ro.FloatVector(expr[:, tmp_idx[i]]),
                    'y': ro.FloatVector(expr[:, tmp_idx[j]])
                })
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})

                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir,
                                           ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename=outfile,
                                plot=gp,
                                width=85,
                                height=85,
                                unit='mm')
    df = pd.DataFrame({
        'name1': names_1,
        'name2': names_2,
        'cor': cors
    },
                      index=titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t')
    return df
コード例 #32
0
ファイル: make_plots.py プロジェクト: ganong123/HARK
  color_grid_major = palette[3]
  color_axis_text = palette[6]
  color_axis_title = palette[7]
  color_title = palette[9]
  palette_lines <- brewer.pal("Set2", n=8)
''')

size = 9
fte_theme = theme(**{'axis.ticks':element_blank(),
      'panel.background':element_rect(fill=robjects.r.color_background, color=robjects.r.color_background),
      'plot.background':element_rect(fill=robjects.r.color_background, color=robjects.r.color_background),
      'panel.border':element_rect(color=robjects.r.color_background),
      'panel.grid.minor':element_blank(),
      'axis.ticks':element_blank(),
      'legend.position':"right",
      'legend.background': element_rect(fill="transparent"),
      'legend.text': element_text(size=size,color=robjects.r.color_axis_title),
      'legend.title': element_text(size=size,color=robjects.r.color_axis_title),
      'plot.title':element_text(color=robjects.r.color_title, size=10, vjust=1.25),
      'axis.text.x':element_text(size=size,color=robjects.r.color_axis_text),
      'axis.text.y':element_text(size=size,color=robjects.r.color_axis_text),
      'axis.title.x':element_text(size=size,color=robjects.r.color_axis_title, vjust=0),
      #'panel.grid.major':element_line(color=robjects.r.color_grid_major,size=.25),
      'axis.title.y':element_text(size=size,color=robjects.r.color_axis_title,angle=90)})

#??? efficiently change legend titles
#right now it takes two legend calls to make this work
#alternatives that tried and failed
#base_plot = lambda gr_name = 'variable': ggplot2.aes_string(x='x', y='value',group=gr_name,colour=gr_name, shape = gr_name)
#colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines, name = ltitle)

pandas2ri.activate() 
コード例 #33
0
ファイル: histNOG.py プロジェクト: zjhzxjm/NGS
def groupBar(fi_data):
    dev_off = robjects.r('dev.off')
    read_delim = robjects.r('read.delim')
    #print(fi_data)
    class_data = read_delim(fi_data, header=True, stringsAsFactors=False)
    robjects.r.assign('class.data', class_data)
    robjects.r.pdf(fi_data + ".Bar.pdf")
    robjects.r('class_data <- class.data')
    class_data = robjects.r('class_data')
    ggplot2.theme = SignatureTranslatedFunction(ggplot2.theme, init_prm_translate={'axis_text_x': 'axis.text.x', 'axis_text_y': 'axis.text.y', 'axis_text_fill': 'axis.text.fill'})
    bar = ggplot2.ggplot(class_data) + ggplot2.geom_bar(stat='identity', position='dodge') + ggplot2.aes_string(x='Class',y='Percent',fill='Group') + ggplot2.theme(axis_text_x=ggplot2.element_text(angle=90, hjust=1))
    bar.plot()
    dev_off()
コード例 #34
0
ファイル: graphics.py プロジェクト: hansenrl/cs249-2
#   lines

# plot with both log and linear y scales
# aes_string: set the axis labels and what we're plotting
# opts: sets thickness of the lines
#   note the use of **{} to allow setting "legend.key.size" as a keyword
# scale_colour_manual: associate color datasets with actual colors and names
# geom_point and geom_line: thicker points and lines
# scale_linetype_manual: associate perf types with linetypes
for col_i, yscale in enumerate(['log', 'linear']): 
  vp = grid.viewport(**{'layout.pos.col':col_i+1, 'layout.pos.row': 1})
  pp = ggplot2.ggplot(df) + \
      ggplot2.aes_string(x='variable', y='Performance', color='color', 
                         shape='PerfType', linetype='PerfType') + \
      ggplot2.ggtitle('Performance vs. Color') + \
      ggplot2.theme(**{'legend.key.size' : ro.r.unit(1.4, "lines") } ) + \
      ggplot2.scale_colour_manual("Color", 
                                  values=colormap,
                                  breaks=colormap.names,
                                  labels=[elt[1] for elt in 
                                          colormap_labels]) + \
      ggplot2.geom_point(size=3) + \
      ggplot2.scale_linetype_manual(values=linemap) + \
      ggplot2.geom_line(size=1.5)

  # custom y-axis lines: major lines ("breaks") are every 10^n; 9
  #   minor lines ("minor_breaks") between major lines
  if (yscale == 'log'):
    pp = pp + \
        ggplot2.scale_y_log10(breaks = ro.r("10^(%d:%d)" % (gflops_range[0], 
                                                            gflops_range[1])),
コード例 #35
0
def plot_volcano_with_r(
    data,
    xlabel='Estimated effect (change in H/L ratio)',
    title='',
    max_labels=20,
    color_background='#737373',
    color_significant='#252525',
    color_significant_muted='#252525',
    label_only_large_fc=False,
    special_labels=None,
    special_palette=None,
    base_size=12,
    label_size=3,
    x='logFC',
    y='neg_log10_p_adjust',
    special_labels_mode='all',
    xlim=None,
    skip_labels=None,
    nudges=None,
):

    r_data, r_like_data = transform_data_for_ggplot(
        data,
        label_only_large_fc=label_only_large_fc,
        special_labels=special_labels,
        max_labels=max_labels,
        special_labels_mode=special_labels_mode,
        skip_labels=skip_labels,
        nudges=nudges)

    plot = r_ggplot2.ggplot(r_data)
    plot += r_ggplot2.theme_minimal(base_size=base_size)
    plot += r_ggplot2.theme(
        **{
            'panel.grid.major':
            r_ggplot2.element_blank(),
            'panel.grid.minor':
            r_ggplot2.element_blank(),
            'panel.border':
            r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black")
        })
    plot += r_ggplot2.theme(
        text=r_ggplot2.element_text(family='Helvetica', face='plain'))
    plot += r_ggplot2.theme(
        **{
            'plot.title': r_ggplot2.element_text(hjust=0.5),
            #                               'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)),
        })

    aes_points = r_ggplot2.aes_string(x=x, y=y, color='group')
    scale_points = r_ggplot2.scale_colour_manual(
        aes_points,
        values=r_label_palette(
            r_like_data,
            special_palette,
            color_background=color_background,
            color_significant=color_significant,
            color_significant_muted=color_significant_muted))

    plot += aes_points
    plot += scale_points

    if xlim is not None:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim))
    else:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits)

    plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit)

    plot += r_ggplot2.geom_hline(
        yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)),
        color='#BDBDBD',
        alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)

    plot += r_ggplot2.geom_point(**{'show.legend': False})

    aes_text = r_ggplot2.aes_string(label='label')
    plot += aes_text
    plot += r_ggrepel.geom_text_repel(
        aes_text,
        nudge_x=r_dollar(r_data, 'nudgex'),
        nudge_y=r_dollar(r_data, 'nudgey'),
        size=label_size,
        family='Helvetica',
        **{
            'show.legend': False,
            'point.padding': 0.25,
            'min.segment.length': 0,
            #'max.iter':0,
            'segment.color': '#BDBDBD'
        },
    )

    plot += r_ggplot2.labs(x=xlabel,
                           y='Adjusted p value (-log10)',
                           title=title)

    plot.plot()
コード例 #36
0
ファイル: r_plot.py プロジェクト: yarden/biorpy
def plot_qc_reads(qc_df):
    """
    Plot number of reads part of a pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0)#.set_index("sample")
    cols = ["sample",
            "num_reads",
            "num_mapped",
            "num_unique_mapped",
            "num_junctions"]
    qc_df = qc_df[cols]
    melted_qc = pandas.melt(qc_df, id_vars=["sample"])
    qc_r = conversion_pydataframe(melted_qc)
    labels = tuple(["num_reads",
                    "num_mapped",
                    "num_unique_mapped",
                    "num_junctions"])
    labels = robj.StrVector(labels)
    variable_i = qc_r.names.index('variable')
    qc_r[variable_i] = robj.FactorVector(qc_r[variable_i],
                                         levels = labels)
    ggplot2.theme_set(ggplot2.theme_bw(12))
    scales = importr("scales")
    r_opts = r.options(scipen=4)
    p = ggplot2.ggplot(qc_r) + \
        ggplot2.geom_point(aes_string(x="sample", y="value")) + \
        ggplot2.scale_y_continuous(trans=scales.log10_trans(),
                                   breaks=scales.trans_breaks("log10",
                                                              robj.r('function(x) 10^x')),
                                   labels=scales.trans_format("log10",
                                                              robj.r('math_format(10^.x)'))) + \
        r.xlab("CLIP-Seq samples") + \
        r.ylab("No. reads") + \
        ggplot2.coord_flip() + \
        ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \
        theme(**{"panel.grid.major.x": element_blank(),
                 "panel.grid.minor.x": element_blank(),
                 "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)})
    p.plot()

    return
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r.par(bty="n", lwd=1.7, lty=2)
    r_opts = r.options(scipen=4)
    r.options(r_opts)
    r.dotchart(convert_to_r_matrix(qc_df[["num_reads",
                                          "num_mapped",
                                          "num_unique_mapped"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped",
                                          "num_ribo",
                                          "num_junctions"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)