def ops(): for w in workloads: data = load_data("physops", [w])[w] sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5}) # sns.set_style("whitegrid") fig, ax = plt.subplots(1, figsize=(8, 4)) data.sort(order='count') c = data['count'].astype(float) c /= sum(c) c *= 100 ypos = np.arange(len(data['phys_operator'])) ppl.barh(ax, ypos, c, yticklabels=data['phys_operator'], grid='x', annotate=True, color=colors[w]) plt.title("Operator Frequency in {}".format(labels[w])) #ax.set_ylabel('Physical operator') ax.set_xlabel('\% of queries') ax.xaxis.grid(False) ax.yaxis.grid(False) #plt.subplots_adjust(bottom=.2, left=.3, right=.99, top=.9, hspace=.35) ax.title.set_position((ax.title._x, 1.04)) fig.tight_layout(rect=[0.03, 0, 1, 1]) fig.text(0.02, 0.55, 'Physical operator', rotation=90, va='center') plt.savefig(root_path + 'plot_ops_%s.eps' % w, format='eps')
def logical_ops_sqlshare(): fig, ax = plt.subplots(1, figsize=(8, 5)) data = read_csv(['logical_ops', 'count'], False) data.sort(order='count') #data = data[-14:] c = data['count'].astype(float) c /= sum(c) c *= 100 ypos = np.arange(len(data['logical_op'])) ppl.barh(ax, ypos, c, yticklabels=data['logical_op'], grid='x', annotate=True) #ax.set_ylabel('Physical operator') ax.set_xlabel('% of queries') #plt.subplots_adjust(bottom=.2, left=.3, right=.99, top=.9, hspace=.35) fig.tight_layout(rect=[0.03, 0, 1, 1]) fig.text(0.02, 0.55, 'Logical operator', rotation=90, va='center') plt.show() fig.savefig('plot_logops_sqlshare.pdf', format='pdf', transparent=True) fig.savefig('plot_logops_sqlshare.png', format='png', transparent=True)
def queries_per_table(): sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5}) with open('../results/sqlshare/queries_per_table.csv') as f: data = np.recfromcsv(f) num_queries = data['num_queries'].astype(float) tables = data['table'] p = re.compile(ur'^.*[A-F0-9]{5}$') logical_tables = [] for t in tables: short_name = re.findall(p, t) if len(short_name) == 0: if t not in logical_tables: logical_tables.append(t) else: if short_name[0][0:-5] not in logical_tables: logical_tables.append(short_name[0][0:-5]) num_queries_lt = [] for lt in logical_tables: max_num_queries = 0 for i,t in enumerate(tables): if lt in t: if num_queries[i] > max_num_queries: max_num_queries = num_queries[i] num_queries_lt.append(max_num_queries) c = [0,0,0,0,0] for num in num_queries_lt: if num == 1.0: c[0] += 1 elif num == 2.0: c[1] += 1 elif num == 3.0: c[2] += 1 elif num == 4.0: c[3] += 1 else: c[4] += 1 fig, ax = plt.subplots(1, figsize=(8, 4)) ypos = np.arange(len(c)) ppl.barh(ax, ypos, c, yticklabels=['1', '2', '3', '4', '$>=5$'], grid='x', annotate=True, color=g) plt.title("Number of queries per table") ax.set_ylabel('Number of queries') ax.set_xlabel('Number of tables') ax.xaxis.grid(False) ax.yaxis.grid(False) plt.tight_layout() plt.savefig(root_path + 'plot_queries_per_table.eps', format='eps')
def barh_chart(title, yvalues, xvalues, file_name): fig, ax = plt.subplots(1) x_pos = np.arange(len(xvalues)) plt.title(title) y_pos = np.arange(len(yvalues)) #plt.barh(y_pos, xvalues) ppl.barh(y_pos, xvalues, grid='x') ppl.barh(y_pos, xvalues, grid='x') plt.yticks(y_pos, yvalues) plt.savefig(file_name + ".eps") plt.close()
def enrich_plot(res, pre="test", x="-log2( p )", t="Enriched GO Terms"): terms, ps = [], [] for r in res: terms.append("%s(%s)" % (r[1], r[0])) ps.append(r[-1]) fig = pylab.figure(figsize=(16, int(8 / 30.0 * len(terms)) + 2)) ax = fig.add_subplot(1, 1, 1) pos = np.array(range(len(terms))) ppl.barh(pos, ps, yticklabels=terms, ax=ax, grid="x", annotate=False) pylab.subplots_adjust(left=0.6, bottom=0.2, top=0.8) ax.set_ylim([0, len(terms)]) pylab.xlabel(x, fontsize=16) pylab.title(t, fontsize=30) fig.tight_layout() pylab.savefig(pre + ".svg", dpi=1000, bbox_inches='tight')
def _plot_evaluation(df_csv): """Provide plot of evaluation metrics, stratified by event size. """ if ppl is None: return None out_file = "%s.pdf" % os.path.splitext(df_csv)[0] if not utils.file_uptodate(out_file, df_csv): metrics = ["sensitivity", "precision"] df = pd.read_csv(df_csv).fillna("0%") fig, axs = ppl.subplots(len(_EVENT_SIZES), len(metrics)) callers = sorted(df["caller"].unique()) if "ensemble" in callers: callers.remove("ensemble") callers.append("ensemble") for i, size in enumerate(_EVENT_SIZES): size_label = "%s to %sbp" % size size = "%s-%s" % size for j, metric in enumerate(metrics): ax = axs[i][j] ax.get_xaxis().set_ticks([]) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.set_xlim(0, 100.0) if i == 0: ax.set_title(metric, size=12, y=1.2) vals, labels = _get_plot_val_labels(df, size, metric, callers) ppl.barh(ax, np.arange(len(vals)), vals, yticklabels=callers) if j == 0: ax.tick_params(axis='y', which='major', labelsize=8) ax.text(80, 4.2, size_label, fontsize=10) else: ax.get_yaxis().set_ticks([]) for ai, (val, label) in enumerate(zip(vals, labels)): ax.annotate(label, (val + 1, ai + 0.35), va='center', size=7) fig.set_size_inches(7, 6) fig.savefig(out_file) return out_file
def main(argv): # get options passed at command line try: opts, args = getopt.getopt(argv, "p:o:t:f:l:x:") except getopt.GetoptError: #print helpString sys.exit(2) #print opts for opt, arg in opts: if opt == '-p': params = arg elif opt == '-t': title = arg elif opt == '-o': outfile_name = arg elif opt == '-f': file_names = arg elif opt == '-l': labels = arg elif opt == '-x': x_vals = arg file_names = file_names.split(",") #print file_names labels = labels.split(",") #print x_vals x_vals = x_vals.split(",") #print x_vals x_vals = [int(x) for x in x_vals] #colors = sns.color_palette("husl", 20) colour_dict = {'DiamondMegan_filtered':(190,190,190), 'CLARK':(178,223,138), 'CLARK-S':(51,160,44), 'DiamondEnsemble':(227,26,28), 'BlastEnsemble':(251,154,153), 'Kraken':(255,127,0), 'Kraken_filtered':(253,191,111), 'PhyloSift':(102,205,0), 'PhyloSift_filtered':(127,255,0), 'MetaPhlAn':(148,0,211), 'LMAT':(176,48,96), 'GOTTCHA':(106,61,154), 'MetaFlow':(177,89,40), 'Community':(0,0,0), 'DiamondMegan_filtered+Kraken_filtered':(202,178,214), 'CLARK+GOTTCHA':(255,233,0), 'BlastMegan_filtered+LMAT':(33,160,160), 'BlastMegan_filtered':(166,206,227), 'BlastMegan_filtered_liberal':(31,120,180), 'NBC':(255,0,255)} for tool in colour_dict: colour_dict[tool] = tuple(map(lambda x: x/255., colour_dict[tool])) #colors_255=[(102,205,0),(176,48,96),(51,160,44),(255,127,0),(178,223,138),(253,191,111),(255,0,255),(127,255,0),(227,26,28),(31,120,180),(251,154,153),(106,61,154),(148,0,211),(202,178,214),(177,89,40),(166,206,227),(190,190,190)] #colors_1 = [] #for (r,g,b) in colors_255: # colors_1.append((float(r)/255.0, float(g)/255.0, float(b)/255.0)) #print colors_1 #colors = colors_1 figure = plt.figure(figsize=(9, 5), dpi=300) font = {'family' : 'Arial', 'weight' : 'normal', 'size' : 8} rc('font', **font) # pass in the font dict as kwargs ##### line plot ###### lineplt = figure.add_subplot(121) data = [] colors = [] for file_name,label in zip(file_names,labels): colors.append(colour_dict[label]) data_points = list(np.genfromtxt(file_name, usecols=0)) print label,data_points data.append(data_points) data_max = [] for data_points in data: data_max.append(int(max(data_points))) data_percent = [] for i, data_points in enumerate(data): percents = [] for val in data_points: per = float(val) / data_max[i] * 100 percents.append(per) data_percent.append(percents) print data_max print labels for i, data_percents in enumerate(data_percent): ppl.plot(x_vals[0:len(data_percents)], data_percents, '-', color=colors[i]) xticks(x_vals, rotation=60) lineplt.set_xlabel("Input dataset size (Million Reads)") yticks(np.arange(0,105,10)) ylim(top=110) lineplt.set_ylabel("Percent max species recovered") #plt.show() barplt = figure.add_subplot(122) labels, data_max, colors = [list(x) for x in zip(*sorted(zip(labels, data_max, colors), key=lambda pair: pair[1]))] ppl.barh(np.arange(0,len(labels)), data_max, log=1, color=colors) yticks(np.arange(0,len(labels)), labels) barplt.yaxis.label.set_verticalalignment('top') barplt.set_xlabel("Maximum number of species predicted") #plt.legend(labels, bbox_to_anchor=(0.98,0.68)) plt.tight_layout() plt.savefig(outfile_name, format='pdf')
def test_barh_xticklabels(): np.random.seed(14) n = 10 ppl.barh(np.arange(n), np.abs(np.random.randn(n)), yticklabels=UPPERCASE_CHARS[:n])
def test_barh_annotate_user(): np.random.seed(14) ppl.barh(np.arange(10), np.abs(np.random.randn(10)), annotate=range(10,20))
def test_barh_annotate(): np.random.seed(14) ppl.barh(np.arange(10), np.abs(np.random.randn(10)), annotate=True)
def test_barh_grid(): np.random.seed(14) ppl.barh(np.arange(10), np.abs(np.random.randn(10)), grid='x')
def test_barh(): np.random.seed(14) ppl.barh(np.arange(10), np.abs(np.random.randn(10)))
def queries_per_table(): sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5}) with open('../results/sqlshare/queries_per_table.csv') as f: data = np.recfromcsv(f) num_queries = data['num_queries'].astype(float) tables = data['table'] p = re.compile(ur'^.*[A-F0-9]{5}$') logical_tables = [] for t in tables: short_name = re.findall(p, t) if len(short_name) == 0: if t not in logical_tables: logical_tables.append(t) else: if short_name[0][0:-5] not in logical_tables: logical_tables.append(short_name[0][0:-5]) num_queries_lt = [] for lt in logical_tables: max_num_queries = 0 for i, t in enumerate(tables): if lt in t: if num_queries[i] > max_num_queries: max_num_queries = num_queries[i] num_queries_lt.append(max_num_queries) c = [0, 0, 0, 0, 0] for num in num_queries_lt: if num == 1.0: c[0] += 1 elif num == 2.0: c[1] += 1 elif num == 3.0: c[2] += 1 elif num == 4.0: c[3] += 1 else: c[4] += 1 fig, ax = plt.subplots(1, figsize=(8, 4)) ypos = np.arange(len(c)) ppl.barh(ax, ypos, c, yticklabels=['1', '2', '3', '4', '$>=5$'], grid='x', annotate=True, color=g) plt.title("Number of queries per table") ax.set_ylabel('Number of queries') ax.set_xlabel('Number of tables') ax.xaxis.grid(False) ax.yaxis.grid(False) plt.tight_layout() plt.savefig(root_path + 'plot_queries_per_table.eps', format='eps')