def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals, image_file_type): samp_vector = ["set1" for i in range(len(samp_set1_vals))] samp_vector.extend(["set2" for i in range(len(samp_set2_vals))]) dframe = robjects.DataFrame({ "sample": robjects.StrVector(samp_vector), "value": robjects.FloatVector(samp_set1_vals + samp_set2_vals) }) gp = ggplot2.ggplot(dframe) pp = gp + \ ggplot2.aes_string(x="sample", y='value') + \ ggplot2.geom_boxplot() +\ ggplot2.geom_jitter() +\ ggplot2.theme_bw() if image_file_type == "pdf": grdevices.pdf(file=plotName) else: grdevices.png(file=plotName, width=512, height=512) pp.plot() grdevices.dev_off()
def interval(locus_table, interval_table, intervals, loci, boxplot = True): qry = get_interval_query(intervals, loci, locus_table, interval_table) frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry)) # because we're sorting by interval, which is a factor, we need to # explicitly re-sort the data by the first integer value # of the interval. This is a bit cumbersome, because sorting # in R is less than pleasant. sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1])) robjects.r(sort_string) gg_frame = ggplot2.ggplot(robjects.r('''data''')) if boxplot: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \ ggplot2.geom_boxplot(**{ 'outlier.size':0, 'alpha':0.3 } ) + \ ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \ alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') else: plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi', fill='locus') + ggplot2.geom_bar() + \ ggplot2.facet_wrap(robjects.Formula('~ locus')) + \ ggplot2.opts(**{ 'axis.text.x':ggplot2.theme_text(angle = -90, hjust = 0), 'legend.position':'none' }) + \ ggplot2.scale_y_continuous('phylogenetic informativeness') + \ ggplot2.scale_x_discrete('interval (years ago)') return plot
def plot_summary(barcodes_obs, barcode_table, directory, expt_id): barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table) df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches}) p = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \ ggplot2.geom_boxplot(outlier_size = 0) + \ ggplot2.geom_jitter() + \ ggplot2.ggtitle(label = expt_id) + \ ggplot2.ggplot2.xlab(label = "") + \ ggplot2.scale_y_continuous(name = "Count\n(million reads)") filename = "{0}/{1}.png".format(directory, expt_id) grdevices.png(filename=filename, width=4, height=5, unit='in', res=300) p.plot() grdevices.dev_off()
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel): grdevices.pdf(file=plot_file_path) gp = ggplot2.ggplot(data) pp = gp \ + ggplot2.aes_string(x='genotype', y='abundance') \ + ggplot2.geom_boxplot() \ + ggplot2.ggtitle(title) \ + ggplot2.labs(x=xlabel, y=ylabel) \ + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \ + ggplot2.geom_point() pp.plot() grdevices.dev_off()
def plot_dupl_url(self): # -- pages per URL (URL-level duplicates) row_filter = ['url'] data = self.histogr data = data[data['type'].isin(row_filter)] title = 'Pages per URL (URL-level duplicates)' p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='count', y='frequency') \ + ggplot2.geom_jitter() \ + ggplot2.facet_wrap('crawl', ncol=5) \ + ggplot2.labs(title=title, x='(duplicate) pages per URL', y='log(frequency)') \ + ggplot2.scale_y_log10() # + ggplot2.scale_x_log10() # could use log-log scale img_path = os.path.join(PLOTDIR, 'crawler/histogr_url_dupl.png') p.save(img_path) # data.to_csv(img_path + '.csv') return p
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals, image_file_type): samp_vector = ["set1" for i in range(len(samp_set1_vals))] samp_vector.extend(["set2" for i in range(len(samp_set2_vals))]) dframe = robjects.DataFrame({"sample":robjects.StrVector(samp_vector), "value":robjects.FloatVector(samp_set1_vals + samp_set2_vals)}) gp = ggplot2.ggplot(dframe) pp = gp + \ ggplot2.aes_string(x="sample", y='value') + \ ggplot2.geom_boxplot() +\ ggplot2.geom_jitter() +\ ggplot2.theme_bw() if image_file_type == "pdf": grdevices.pdf(file=plotName) else: grdevices.png(file=plotName, width=512, height=512) pp.plot() grdevices.dev_off()