def plot_total_bp(parser, args, tot_bp_per_pore): """ Plot the pore performance """ import math r = robjects.r r.library("ggplot2") grdevices = importr("grDevices") flowcell_layout = minion_flowcell_layout() pore_values = [] for pore in flowcell_layout: if pore in tot_bp_per_pore: pore_values.append(math.log10(tot_bp_per_pore[pore])) else: pore_values.append(0) # make a data frame of the lists d = { "rownum": robjects.IntVector(range(1, 17) * 32), "colnum": robjects.IntVector(sorted(range(1, 33) * 16)), "log10_tot_bp": robjects.IntVector(pore_values), "labels": robjects.IntVector(flowcell_layout), } df = robjects.DataFrame(d) gp = gg.ggplot(df) pp = ( gp + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)") + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7) + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2) + gg.scale_colour_gradient2(low="black", mid="black", high="red") + gg.coord_fixed(ratio=1.4) + gg.labs(x=gg.NULL, y=gg.NULL) ) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width=11, height=8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width=11, height=8.5, units="in", res=300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print ("Type enter to exit.") raw_input()
def plot_total_bp(parser, args, tot_bp_per_pore): """ Plot the pore performance """ import math r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') flowcell_layout = minion_flowcell_layout() pore_values = [] for pore in flowcell_layout: if pore in tot_bp_per_pore: pore_values.append(math.log10(tot_bp_per_pore[pore])) else: pore_values.append(0) # make a data frame of the lists d = {'rownum': robjects.IntVector(range(1,17)*32), 'colnum': robjects.IntVector(sorted(range(1,33)*16)), 'log10_tot_bp': robjects.IntVector(pore_values), 'labels': robjects.IntVector(flowcell_layout) } df = robjects.DataFrame(d) gp = gg.ggplot(df) pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \ x = 'factor(colnum)') \ + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \ + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \ + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \ + gg.coord_fixed(ratio=1.4) \ + gg.labs(x=gg.NULL, y=gg.NULL) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 11, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 11, height = 8.5, units = "in", res = 300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def plot_similarity_matrix(self, item_type, image_file, title): '''Plot similarities of crawls (overlap of unique items) as heat map matrix''' data = defaultdict(dict) n = 1 for crawl1 in self.similarity[item_type]: for crawl2 in self.similarity[item_type][crawl1]: similarity = self.similarity[item_type][crawl1][crawl2] data['crawl1'][n] = MonthlyCrawl.short_name(crawl1) data['crawl2'][n] = MonthlyCrawl.short_name(crawl2) data['similarity'][n] = similarity data['sim_rounded'][n] = similarity # to be rounded n += 1 data = pandas.DataFrame(data) print(data) # select median of similarity values as midpoint of similarity scale midpoint = data['similarity'].median() decimals = 3 textsize = 2 minshown = .0005 if (data['similarity'].max()-data['similarity'].min()) > .2: decimals = 2 textsize = 2.8 minshown = .005 data['sim_rounded'] = data['sim_rounded'].apply( lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0') if x >= minshown else '0') print('Median of similarities for', item_type, '=', midpoint) matrix_size = len(self.similarity[item_type]) if matrix_size > self.MAX_MATRIX_SIZE: n = 0 for crawl1 in sorted(self.similarity[item_type], reverse=True): short_name = MonthlyCrawl.short_name(crawl1) if n > self.MAX_MATRIX_SIZE: data = data[data['crawl1'] != short_name] data = data[data['crawl2'] != short_name] n += 1 p = ggplot2.ggplot(data) \ + ggplot2.aes_string(x='crawl2', y='crawl1', fill='similarity', label='sim_rounded') \ + ggplot2.geom_tile(color="white") \ + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white", midpoint=midpoint, space="Lab") \ + GGPLOT2_THEME \ + ggplot2.coord_fixed() \ + ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle=45, vjust=1, hjust=1)}) \ + ggplot2.labs(title=title, x='', y='') \ + ggplot2.geom_text(color='black', size=textsize) img_path = os.path.join(PLOTDIR, image_file) p.save(img_path) return p
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name): columns_to_data = {'subgroup': [], tname: [], 'count': []} max_count = 0 for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items(): for ss, n_count in sses_to_n_count.items(): columns_to_data['subgroup'].append(subgroup) columns_to_data[tname].append(ss) columns_to_data['count'].append(n_count) if n_count > max_count: max_count = n_count r_columns_to_data = { 'subgroup': ro.FactorVector(columns_to_data['subgroup'], levels=ro.StrVector( _sort_subgroup(set(columns_to_data['subgroup'])))), tname: ro.StrVector(columns_to_data[tname]), 'count': ro.IntVector(columns_to_data['count']) } df = ro.DataFrame(r_columns_to_data) max_count = int(max_count / 1000 * 1000 + 1000) histogram_file_path = os.path.join(OUTPUT_PATH, file_name) logging.debug( str.format("The Data Frame for file {}: \n{}", histogram_file_path, df)) grdevices.png(file=histogram_file_path, width=1200, height=800) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \ ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \ ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]), limits=ro.IntVector([0, max_count])) + \ ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1, position=ggplot2.position_dodge(width=0.8), vjust=-0.2) pp.plot() logging.info(str.format("Output step3 file {}", histogram_file_path)) grdevices.dev_off()
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count, file_name): columns_to_data = {'place': [], 'pos': [], 'count': []} max_count = 0 for place_pos_type, n_count in place_type_pos_type_to_count.items(): place_type, pos_type = place_pos_type.split('_') columns_to_data['place'].append(place_type) columns_to_data['pos'].append(pos_type) columns_to_data['count'].append(n_count) if n_count > max_count: max_count = n_count r_columns_to_data = { 'place': ro.StrVector(columns_to_data['place']), 'pos': ro.StrVector(columns_to_data['pos']), 'count': ro.IntVector(columns_to_data['count']) } df = ro.DataFrame(r_columns_to_data) if max_count > 1000: max_count = int(max_count / 1000 * 1000 + 1000) else: max_count = int(max_count / 100 * 100 + 100) histogram_file_path = os.path.join(OUTPUT_PATH, file_name) logging.debug( str.format("The Data Frame for file {}: \n{}", histogram_file_path, df)) grdevices.png(file=histogram_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.aes_string(x='pos', y='count', fill='place') + \ ggplot2.geom_bar(position="dodge", stat="identity") + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]), limits=ro.IntVector([0, max_count])) + \ ggplot2.geom_text(ggplot2.aes_string(label='count'), position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2, vjust=-0.5) pp.plot() logging.info(str.format("Output step3 file {}", histogram_file_path)) grdevices.dev_off()
onlysurf=robjects.r('onlysurface<-onlyfilt[ which("Sampledepth" =="0.30487806"),]') #print "onlysurf" #print onlysurf #colours2 = grdevices.topo_colors(10) colours2 = grdevices.cm_colors(10) #colours2 = grdevices.rainbow(20) #print colours2 #colours = ggplot2.rainbow(54) #bins=10 gp = ggplot2.ggplot(onlysurf) #gp = ggplot2.ggplot(onlyfilts) gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station") gp=gp+ggplot2.scale_colour_gradientn(colours=colours2) gp=gp+ggplot2.geom_text(col="black",offset = 10) gp=gp+ggplot2.geom_point(position="jitter") gp=gp+ggplot2.ggtitle(graphtitle) robjects.r('library(ggmap)') robjects.r('library(mapproj)') robjects.r('map <- get_map(location = "Europe", zoom = 4)') robjects.r('ggmap(map)') #robjects.r('library(maps)') #robjects.r('map("world", interior = FALSE)') #robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)') #gp.plot()
iris_py = pandas.read_csv("/home/yarden/iris.csv") iris_py = iris_py.rename(columns={"Name": "Species"}) corrs = [] from scipy.stats import spearmanr for species in set(iris_py.Species): entries = iris_py[iris_py["Species"] == species] c = spearmanr(entries["SepalLength"], entries["SepalWidth"]) print "c: ", c # compute r.cor(x, y) and divide up by Species # Assume we get a vector of length Species saying what the # correlation is for each Species' Petal Length/Width p = ggplot2.ggplot(iris) + \ ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \ ggplot2.facet_wrap(Formula("~Species")) p.plot() r["dev.off"]() sys.exit(1) grdevices = importr('grDevices') ggplot2.theme_set(ggplot2.theme_bw(12)) p = ggplot2.ggplot(iris) + \ ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \ ggplot2.facet_wrap(Formula('~ Species'), ncol=2, nrow = 2) + \ ggplot2.geom_text(aes_string(x="Sepal.Length", y="Sepal.Width"), label="t") + \ ggplot2.GBaseObject(r('ggplot2::coord_fixed')()) # aspect ratio p.plot()