def distopcounts(): fig, ax = plt.subplots(1, figsize=(8, 4)) ax.set_yscale('log') data = read_csv(['distinct_ops', 'counts'], True) #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True) ppl.scatter(ax, data['distinct_ops'], data['counts'], color=pcs[0], marker="o", label="SDSS", s=100) data = read_csv(['distinct_physical_ops'], False) ppl.scatter(ax, data['distinct_physical_ops'], data['count'], color=pcs[1], marker="v", label="SQLShare", s=100) ax.set_xlabel('Distinct physical operators used') ax.set_ylabel('# of queries') ppl.legend(ax, loc='lower right') ax.set_xlim(0) ax.set_ylim(0) fig.tight_layout() plt.show() fig.savefig('plot_dist_physops_query.pdf', format='pdf', transparent=True) fig.savefig('plot_dist_physops_query.png', format='png', transparent=True)
def generate_chart(self, how_many=-1): try: assert(os.path.exists(os.path.join(self.log_dir, self.log_filename))) except AssertionError: self.logger.log(self.logger.red(os.path.join(self.log_dir, self.log_filename) + ' not found!')) return data = np.loadtxt(os.path.join(self.log_dir, self.log_filename)) if (how_many != -1): data = data[-1 * how_many:] x_labels = range(data.shape[0]) fig, ax = plt.subplots(1) plt.title('GPU memory usage') plt.xlabel('Time') plt.ylabel('Memory') ppl.plot(ax, x_labels, data, '-r', label='memory') ppl.legend(ax, loc='lower right') chart_name = os.path.join(self.log_dir, self.log_filename[:-4] + '.png') fig.savefig(chart_name, bbox_inches='tight') plt.close(fig) self.logger.log('chart saved at', chart_name)
def table_touch(dataset = True): fig, ax = plt.subplots(1) ax.set_yscale('log') data = read_csv(['touch', 'counts'], True) #ppl.bar(ax, range(len(data['touch'])), data['counts'], xticklabels=data['touch'], grid='y', log=True) ppl.scatter(ax, data['touch'], data['counts'], label="SDSS", marker="o", s=100) if dataset: data = read_csv(['dataset_touch'], False) ppl.scatter(ax, data['dataset_touch'], data['count'], label="SQLShare (Dataset)", marker="v", s=100, color=pcs[0]) else: data = read_csv(['touch'], False) ppl.scatter(ax, data['touch'], data['count'], label="SQLShare", marker="v", s=100, color=pcs[1]) ax.set_xlabel('Table touch') ax.set_ylabel('# of queries') ppl.legend(ax) ax.set_ylim(0) plt.show() if dataset: fig.savefig('plot_touch_dataset.pdf', format='pdf', transparent=True) fig.savefig('plot_touch_dataset.png', format='png', transparent=True) else: fig.savefig('plot_touch.pdf', format='pdf', transparent=True) fig.savefig('plot_touch.png', format='png', transparent=True)
def new_tables_cdf(): fig, ax = plt.subplots(1) data = read_csv(['query_number', 'num_new_tables'], True) c = data['num_new_tables'].astype(float) c /= sum(c) q = data['query_number'].astype(float) q /= q[-1] ppl.plot(ax, q, np.cumsum(c), label="SDSS", color=cs[0], linewidth=2, ls='-.', drawstyle='steps-post') ppl.scatter(ax, q, np.cumsum(c), color=cs[0], marker="o", s=100) data = read_csv(['table_coverage'], False) c = data['tables'].astype(float) c /= c[-1] q = data['query_id'].astype(float) q /= q[-1] ppl.plot(ax, q, c, label="SQLShare", color=cs[1], linewidth=2, ls='-.', drawstyle='steps-post') ppl.scatter(ax, q, c, color=cs[1], marker="o", s=100) ppl.legend(ax, loc='lower right') plt.gca().yaxis.set_major_formatter(formatter) ax.set_xlabel('Query number') ax.set_ylabel('% of newly used table') ax.set_ylim(0, 1.01) ax.set_xlim(0, 1) ax.yaxis.grid() plt.show() fig.savefig('num_new_tables.pdf', format='pdf', transparent=True) fig.savefig('num_new_tables.png', format='png', transparent=True)
def plot_originaldata_guess_fit(self, filename, index): ''' Plot the original data with guess and fitted curve in log graph. ''' fig, ax = plt.subplots() # Plot original data ppl.plot(ax, self.concentration, self.response, 'o', label='experimental data') logplotX = np.log10(self.concentration) plotX = 10 ** np.linspace(np.floor(np.amin(logplotX) - 1), np.ceil(np.amax(logplotX) + 1), 100) self.xmin = plotX[0].copy() self.xmax = plotX[-1].copy() # Plot guess curve plotYguess = hill_equation( self.guess, plotX, self.Component, self.YminFixed, False) ppl.plot(ax, plotX, plotYguess, label='guess') # Plot fitted curve plotYfit = hill_equation( self.coeffs, plotX, self.Component, self.YminFixed, False) ppl.plot(ax, plotX, plotYfit, label='fit') ppl.legend(ax, loc='lower right') ax.set_xscale('log') ax.set_title( os.path.split(filename)[1][:-4] + ' ' + str(index + 1)) fig.savefig(filename[:-4] + '_' + str(index + 1) + '.png') plt.close(fig)
def plot_lambda_effect(): x_training = np.linspace(0, 1, 40) y_training = exercise_11.sample_gaussian(exercise_11.f, 0, 0.3, x_training) # Loop through lambda's. weights_vector = [] for l in range(-5, 15): # Zip x and y pairs together, fit a curve. weights_vector.append(PolCurFit(zip(x_training, y_training), 9, labda=10**-l)) x_test = np.linspace(0, 1, 100) y_test = exercise_11.sample_gaussian(exercise_11.f, 0, 0.3, x_test) rmse_training = np.zeros((len(weights_vector))) rmse_test = np.zeros((len(weights_vector))) i = 0 weights_vector = weights_vector[::-1] for weights in weights_vector: poly_output_100 = [eval_polynomial(weights, x) for x in np.linspace(0, 1, 100)] poly_output_40 = [eval_polynomial(weights, x) for x in np.linspace(0, 1, 40)] rmse_training[i] = np.sqrt(np.mean((poly_output_40 - y_training)**2)) rmse_test[i] = np.sqrt(np.mean((poly_output_100 - y_test)**2)) i = i + 1 fig, ax = plt.subplots(1) ppl.plot(ax, np.arange(-14, 6), rmse_training, linewidth=0.75, label='RMSE on training set') ppl.plot(ax, np.arange(-14, 6), rmse_test, linewidth=0.75, label='RMSE on test set') ppl.legend(ax, loc='upper right', ncol=2) ax.set_xlabel('$log_{10} \lambda$') ax.set_ylabel('RMSE') ax.set_title('RMSE for the polynomial approximation of sine function') fig.savefig('exercise_lambda_rmse_plot40.pdf')
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9): """Plot all `clusters` among `candidates` with the `bounds` of the city (or at least `shrink` of them). Also plot convex `hulls` of gold areas if provided.""" xbounds, ybounds = bounds unique_labels = len(clusters) clustered = set().union(*map(list, clusters)) noise = list(candidates.difference(clustered)) if unique_labels > 5: colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels+1)) else: colors = [gray, red, green, blue, orange] plt.figure(figsize=(20, 15)) for k, indices, col in zip(range(unique_labels+1), [noise]+clusters, colors): k -= 1 if k == -1: col = 'gray' ppl.scatter(vloc[indices, 0], vloc[indices, 1], s=35 if k != -1 else 16, color=col, alpha=0.8 if k != -1 else 0.6, label='noise' if k == -1 else 'cluster {}'.format(k+1)) hulls = hulls or [] for idx, hull in enumerate(hulls): first_again = range(len(hull))+[0] ppl.plot(hull[first_again, 0], hull[first_again, 1], '--', c=ppl.colors.almost_black, lw=1.0, alpha=0.9, label='gold region' if idx == 0 else None) plt.xlim(shrink*xbounds) plt.ylim(shrink*ybounds) ppl.legend()
def plot(): # Use 10 sample points for the noisy signal. x_training = np.linspace(0, 1, 10) y_training = sample_gaussian(f, 0, 0.3, x_training) # Use 1000 sample points for the real signal. Needs to be higher for higher resolution images. x_real = np.linspace(0, 1, 1000) y_real = [f(x_sample) for x_sample in x_real] fig, ax = plt.subplots(1) ppl.plot(ax, x_training, y_training, '-o', linewidth=0.75, label='Observations') ppl.plot(ax, x_real, y_real, linewidth=0.75, label='Function') ppl.legend(ax, loc='upper right', ncol=2) ax.set_xlabel('x') ax.set_ylabel('y') ax.set_title('Noisy observations versus real function') fig.savefig('exercise_11.pdf')
def plotorifit(filename, std, peak, popendiff, interval, precentage): xfit, yfit = drawpeak(peak, popendiff, interval) xdiff = [0] ydiff = [0] for index in range(len(peak)): xdiff.append(peak[index]-1) ydiff.append(0) xdiff.append(peak[index]) ydiff.append(orifitdiff(std, peak[index], popendiff[index], interval, precentage)) xdiff.append(peak[index]+1) ydiff.append(0) xdiff.append(len(result)-1) ydiff.append(0) ydiff = np.array(ydiff) * (np.max(std)/np.max(ydiff)/2) if np.max(ydiff) == 0: print ydiff print np.max(ydiff) sys.exit(0) fig, ax = plt.subplots(1) ppl.plot(ax, xfit, yfit, label='curve_fit', linewidth=1.) ppl.plot(ax, np.arange(len(std)), std, label='original', linewidth=0.75) ppl.plot(ax, xdiff, ydiff, label='fit_diff', linewidth=1) ppl.legend(ax) ax.set_title(filename + ' window ' + str(interval) + ' original std and fit') fig.savefig(pathfilename + '_window_' + str(interval) + '_original_std_fit' + '.png',dpi=500) plt.close() print filename, 'Standard deviation and curve fit plot saved.'
def participation(): user_set = User.objects.filter(is_active=True) num_weeks, start_date = calculate_week(user_set) # Weekly comment data comment_data = [DiscussionComment.objects.filter(user__in=user_set, \ created__lt=(start_date+datetime.timedelta(days=7))).count()] lower_bound = 7 upper_bound = 14 for i in range(num_weeks - 1): comment_count = DiscussionComment.objects.filter(user__in=user_set, \ created__gte=(start_date+datetime.timedelta(days=lower_bound)), \ created__lt=(start_date+datetime.timedelta(days=upper_bound))).count() comment_data.append(comment_count) lower_bound += 7 upper_bound += 7 # Weekly QAT rating data rating_data = [UserRating.objects.filter(user__in=user_set, \ created__lt=(start_date+datetime.timedelta(days=7))).count()] lower_bound = 7 upper_bound = 14 for i in range(num_weeks - 1): rating_count = UserRating.objects.filter(user__in=user_set, \ created__gte=(start_date+datetime.timedelta(days=lower_bound)), \ created__lt=(start_date+datetime.timedelta(days=upper_bound))).count() rating_data.append(rating_count) lower_bound += 7 upper_bound += 7 # Weekly login data login_data = [LogUserEvents.objects.filter(log_type=0, \ created__lt=(start_date+datetime.timedelta(days=7))).count()] lower_bound = 7 upper_bound = 14 for i in range(num_weeks - 1): login_count = LogUserEvents.objects.filter(log_type=0, \ created__gte=(start_date+datetime.timedelta(days=lower_bound)), \ created__lt=(start_date+datetime.timedelta(days=upper_bound))).count() login_data.append(rating_count) lower_bound += 7 upper_bound += 7 fig, ax = plt.subplots() ax.set_xlabel('Week') ax.set_ylabel('Weekly Participation') ax.set_title('Weekly User Participation') x = range(1, num_weeks+1) ppl.plot(ax, x, login_data, label='# Participants', linewidth=1.5) ppl.plot(ax, x, rating_data, label='# QAT ratings', linewidth=1.5) ppl.plot(ax, x, comment_data, label='# suggestions', linewidth=1.5) ppl.legend(ax, loc='lower right') plt.show() fig.savefig('../../client/media/images/participation.png')
def test_legend(): # Set the random seed for consistency np.random.seed(12) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(x, y, label=str(i)) ppl.legend()
def plot_depth_ratios(depths, ratios, quals, in_file, title): out_file = "%s-depthratios.png" % os.path.splitext(in_file)[0] fig, ax = plt.subplots(1) for ds, rs, qualrange in _group_ratios_by_qual(depths, ratios, quals): print qualrange, len(ds) ppl.scatter(ax, x=depths, y=ratios, label=qualrange) ppl.legend(ax, title="Quality score range") ax.set_title(title) ax.set_xlabel("Depth") ax.set_ylabel("Variant/Total ratio") fig.savefig(out_file)
def scatterFreeByMissingEdges(type_header, type_table, config, instanceType, solsPath, solsExt, figName=None): if config not in type_table: raise Exception("Config \"" + config + "\" not found!") if instanceType not in type_table[config]: raise Exception("Instance type \"" + instanceType + "\" not found!") fig, ax = plt.subplots(1) for size in sorted( type_table[config][instanceType].iterkeys()): # instanceType fixedEdges = np.array( map(int, type_table[config][instanceType][size]["preproc.fixedEdges"])) blockedEdges = np.array( map(int, type_table[config][instanceType][size] ["preproc.blockedEdges"])) instanceNumbers = np.array( type_table[config][instanceType][size]["instanceNumber"]) if size == 80: solsPath += '_80-90' missingEdges = [] freeEdges = [] for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges, instanceNumbers): instanceName = "%s_%03d_%02d%s" % (instanceType, size, instanceNumber, solsExt) solEdges = getNEdges(os.path.join(solsPath, instanceName)) mEdges = solEdges - fEdges missingEdges.append(mEdges) frEdges = ((size - 1) * size) / 2 - (fEdges + bEdges) freeEdges.append(frEdges) ppl.scatter(ax, missingEdges, freeEdges, label=str(size)) ppl.legend(ax, loc="lower right") ax.set_xlabel(u'Arestas faltantes') ax.set_ylabel(u'Arestas livres') # ax.set_aspect('equal') ax.set_xlim((0, ax.get_xlim()[1])) ax.set_ylim((0, ax.get_ylim()[1])) # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params') if figName != None: fig.savefig(figName, bbox_inches='tight')
def ppm_efficiency_by_depth_helper2(fnames, priors, depths, test_name, opts): test_files = list(itertools.chain(*config.PPM_EFFICIENCY_BY_DEPTH_FILESETS.values())) original_sizes = {f : benchmark.tasks.corpus_size(f) for f in fnames} work = [] for opt, (prior, depth) in zip(opts, itertools.product(priors, depths)): x, fun, status = opt a, b = x work += [benchmark.tasks.my_compressor.s(test_file, paranoia, prior, ['ppm:d={0}:a={1}:b={2}'.format(depth, a, b)]) for test_file in test_files] raw_res = celery.group(work)().get() res = {} for effectiveness, (prior, depth, test_file) in zip(raw_res, itertools.product(priors, depths, test_files)): by_prior = res.get(prior, {}) by_depth = by_prior.get(depth, {}) by_depth[test_file] = effectiveness by_prior[depth] = by_depth res[prior] = by_prior fig = plot.new_figure() colors = ppl.brewer2mpl.get_map('Set2', 'qualitative', len(config.PPM_EFFICIENCY_BY_DEPTH_FILESETS)).mpl_colors for (name, fileset), color in zip(config.PPM_EFFICIENCY_BY_DEPTH_FILESETS.items(), colors): for prior in priors: y = [] for d in depths: by_file = res[prior][d] mean = np.mean([by_file[f] / original_sizes[f] * 8 for f in fileset]) y.append(mean) linestyle = config.PPM_EFFICIENCY_BY_DEPTH_PRIOR_LINESTYLES[prior] marker = config.PPM_EFFICIENCY_BY_DEPTH_PRIOR_MARKERS[prior] min_i = np.argmin(y) markevery = list(range(0, min_i)) + list(range(min_i + 1, len(depths))) ppl.plot(depths, y, label='{1} on {0}'.format(name, short_name(config.SHORT_PRIOR_NAME, prior)), color=color, linestyle=linestyle, marker=marker, markevery=markevery) min_depth = depths[min_i] min_y = y[min_i] ppl.plot([min_depth], [min_y], color=color, linestyle='None', marker='D') plt.xlabel(r'Maximal context depth $d$') plt.ylabel(r'Compression effectiveness (bits/byte)') # stretch x-axis slightly so markers are visible plt.xlim(min(depths) - 0.1, max(depths) + 0.1) ppl.legend(handlelength=4, # increase length of line segments so that linestyles can be seen numpoints=1 # but only show marker once ) return plot.save_figure(fig, test_name, ["dummy"])
def chem160_plotting(x, y, title='LABEL ME', legend_label=None, xlabel='LABEL ME', ylabel='LABEL ME'): ''' It's not really important to understand the innerworkings of this function. Just know that this will be the general function that we'll use to plot during this semester. It has nice colours, as well as other defaults set. INPUT: x: An array or arrays to be plotted. These are the x axes to be plotted y: An array or arrays to be plotted. These are the y axes to be plotted title: String that defines the plot title. The default title is LABEL ME to remind you to always label your plots legend_label: A string or array of strings that define the legend entries to be used xlabel: A string that defines the xlabel. This can accept latex ylabel: A string that defines the ylabel. This can accept latex OUTPUT: None. A plot is displayed ''' import prettyplotlib as ppl fig, ax = plt.subplots(1) fig.set_size_inches(10, 8) for ind in range(len(y)): if legend_label != None: ppl.plot(ax, x[ind], y[ind], label=legend_label[ind], linewidth=3) else: ppl.plot(ax, x[ind], y[ind], linewidth=3) ppl.legend(ax, fontsize=18) ax.set_title(title, fontsize=24) ax.set_xlabel(xlabel, fontsize=20) ax.set_ylabel(ylabel, fontsize=20) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(20) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(20) ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_tick_params(width=3) ax.yaxis.set_ticks_position('left') ax.yaxis.set_tick_params(width=3) plt.grid(b=True, which='major', color='0.65', linestyle='-')
def plot_users_similarity(log_scale=True, *edges): sim = [_[3] for _ in edges[0]] rsim = [_[3] for _ in edges[1]] if log_scale: plt.hist(np.log10(sim), 50, label='cosine', alpha=.5) plt.hist(np.log10(rsim), 50, label='random', alpha=.5) plt.xlabel('users similarity along edge x_ij (log10 scale)') else: plt.hist(sim, 50, label='cosine', alpha=.5) plt.hist(rsim, 50, label='random', alpha=.5) plt.xlabel('users similarity along edge x_ij') plt.ylabel('count') ppl.legend()
def plot_coef(coef_list,err_list,coef_name_list,coef_site_list,title="",cmap=ppl.set2): fig, ax = subplots(1,1) N = len(coef_list[0]) M = len(coef_list) width = 1./(N+1) ax.set_xlim(0,M+1) for i in range(M): X = np.arange(N)+i*width ppl.bar(ax,X,coef_list[i],yerr = err_list[i],label=coef_site_list[i],width=width,color=cmap[i]) ax.set_xticks(np.arange(N)+width*N/2.) ax.set_xticklabels(coef_name_list) ppl.legend(ax)
def plotpopendiff(filename, result, peak, popendiffstdamp): fig, ax = plt.subplots(1) plotpeak = np.hstack((peak.astype(np.int64), len(result)))[::-1] popen = popenpeak(result, peak)[::-1] popenoriginal = np.empty(len(result)) for index in range(len(plotpeak)): popenoriginal[:plotpeak[index]] = popen[index] plotpeak = plotpeak[::-1] popenfit = copy.copy(popenoriginal) for index in range(len(popendiffstdamp)-1): popenfit[plotpeak[index]-1] = (popenoriginal[plotpeak[index]-1] + popenoriginal[plotpeak[index]])/2 - popendiffstdamp[index]/2 popenfit[plotpeak[index]] = (popenoriginal[plotpeak[index]-1] + popenoriginal[plotpeak[index]])/2 + popendiffstdamp[index]/2 ppl.plot(ax, np.arange(len(popenfit)), popenfit, label='calculated', linewidth=1) ppl.plot(ax, np.arange(len(popenoriginal)), popenoriginal, label='original', linewidth=2) if np.all(popenoriginal[len(popenoriginal)/2:] > (max(popenoriginal)/2)): ppl.legend(ax, loc='lower right') elif np.all(popenoriginal[len(popenoriginal)/2:] < (max(popenoriginal)/2)): ppl.legend(ax, loc='upper right') elif np.all(popenoriginal[:len(popenoriginal)/2] < (max(popenoriginal)/2)): ppl.legend(ax, loc='upper left') elif np.all(popenoriginal[:len(popenoriginal)/2] > (max(popenoriginal)/2)): ppl.legend(ax, loc='lower left') ax.set_title(savefilename + ' window ' + str(interval) + ' original Popen and fit') fig.savefig(pathfilename + '_window_' + str(interval) + '_original_Popen_fit' + '.png',dpi=500) plt.close() print savefilename, 'Popen and calculated Popen plot saved.'
def _print_and_plot(mean_alpha, mean_beta, alphas, betas, n_iter, data): print print mean_alpha, mean_beta, ' estimated modality:', \ _assign_modality_from_estimate(mean_alpha, mean_beta) import numpy as np from scipy.stats import beta import matplotlib.pyplot as plt import prettyplotlib as ppl fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) ax = axes[0] ppl.plot(alphas, label='alpha', ax=ax) ppl.plot(betas, label='beta', ax=ax) ppl.legend(ax=ax) ax.hlines(mean_alpha, 0, n_iter) ax.hlines(mean_beta, 0, n_iter) ax.annotate('mean_alpha = {:.5f}'.format(mean_alpha), (0, mean_alpha), fontsize=12, xytext=(0, 1), textcoords='offset points') ax.annotate('mean_beta = {:.5f}'.format(mean_alpha), (0, mean_beta), fontsize=12, xytext=(0, 1), textcoords='offset points') ax.set_xlim(0, n_iter) ax = axes[1] ppl.hist(data, facecolor='grey', alpha=0.5, bins=np.arange(0, 1, 0.05), zorder=10, ax=ax) ymin, ymax = ax.get_ylim() one_x = np.arange(0, 1.01, 0.01) x = np.repeat(one_x, n_iter).reshape(len(one_x), n_iter) beta_distributions = np.vstack( (beta(a, b).pdf(one_x) for a, b in zip(alphas, betas))).T ppl.plot(x, beta_distributions, color=ppl.colors.set2[0], alpha=0.1, linewidth=2, ax=ax) ax.set_ylim(0, ymax)
def analyze_solution(recovered_users, hidden_user_idx, edges, verbose=False, drawing=False): global USERS adj = defaultdict(set) for i, j, _, _ in edges: adj[i].add(j) adj[j].add(i) recovered_users /= np.sqrt((recovered_users ** 2).sum(-1))[..., np.newaxis] gold_users = USERS[hidden_user_idx, FEATURE_START:] gold_users /= np.sqrt((gold_users ** 2).sum(-1))[..., np.newaxis] diff = np.sqrt(((gold_users - recovered_users) ** 2).sum(-1)) non_zeros = np.where(recovered_users[:, 0] > -100)[0] if verbose: print('average distance {:.3f}'.format(np.mean(diff[non_zeros]))) prct = [5, 25, 50, 75, 95] vals = np.percentile(diff[non_zeros], prct) print('Percentile: '+'\t'.join(['{}'.format(str(_).ljust(5)) for _ in prct])) print(' '+'\t'.join(['{:.3f}'.format(_) for _ in vals])) embeddings = np.zeros((4, non_zeros.size)) i = 0 for uidx in range(len(recovered_users)): neighbors = adj[hidden_user_idx[uidx]] hidden_neighbors = {_ for _ in neighbors if _ in hidden_user_idx} tot_dst = 0 me = USERS[uidx, FEATURE_START:] me /= np.linalg.norm(me) for n in neighbors: nei = USERS[n, FEATURE_START:] tot_dst += np.linalg.norm(nei/np.linalg.norm(nei) - me) if uidx in non_zeros: embeddings[:, i] = [diff[uidx], len(neighbors), len(hidden_neighbors)/len(neighbors), tot_dst/len(neighbors)] i += 1 if drawing: labels = ['number of neighbors', 'fraction of unknown neighbors', 'mean distance from all neighbors'] for i in range(1, 4): with sns.plotting_context("notebook", font_scale=1.7, rc={"figure.figsize": (20, 10)}): sns.regplot(embeddings[i, :], embeddings[0, :], label=labels[i-1]) ppl.legend() return embeddings
def plot_scatter_k_means_2d(n_clusters, clusters, is_plot=False): if is_plot: class_name = ["Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6", "Class_7", "Class_8", "Class_9", "Class_10", "Class_11", "Class_12", "Class_12", "Class_13", "Class_14", "Class_15"] colors = ["lime", "aqua","deeppink", "orangered","dodgerblue", "magenta","darkolivegreen","crimson","yellow","darkorchid","dodgerblue", "mediumpurple","hotpink","cyan","orangered" ] fig, ax = plt.subplots() ax.set_xlabel("x axis") ax.set_ylabel("y axis") title = "Plot for K-Means class" ax.set_title(title) for i in range(0, n_clusters): x_axis = tuple(x[0] for x in clusters[i]) y_axis = tuple(x[1] for x in clusters[i]) ppl.scatter(ax,x_axis, y_axis, color=colors[i], label=class_name[i]) ppl.legend(ax) plt.show() fig.savefig('k_means_classification_2d_plot.jpg')
def plot_eff_by_rate_of_motion(ax, blocks): pwd = path + 'efficiency_motion_rates' outfile = 'OSSOS_e-o_efficiency.pdf' colours = sequential.Greens_6.mpl_colors[::-1] # want efficiency for most-targeted <8"/hr objects most prominent colours = colours[0:2] + [colours[3]] offset = 0.017 # set the error bars of data off from each other slightly for legibility; also displace smooth fits for i, block in enumerate(blocks): plot_smooth_fit(i, block, ax, colours, pwd, offset=offset) plot_eff_data(i, block, ax, colours, pwd, offset) ax[i].grid(True, alpha=0.3) ax[i].set_ylabel('efficiency (%)') src.ossos.core.ossos.planning.plotting.plot_fanciness.remove_border(ax[i]) ppl.legend(ax[i], loc='lower left', title=block, numpoints=1, fontsize='small', handletextpad=0.5) return outfile
def plot_eff_by_rate_of_motion(ax, blocks): pwd = path + 'efficiency_motion_rates' outfile = 'OSSOS_e-o_efficiency.pdf' colours = sequential.Greens_6.mpl_colors[::-1] # want efficiency for most-targeted <8"/hr objects most prominent colours = colours[0:2] + [colours[3]] offset = 0.017 # set the error bars of data off from each other slightly for legibility; also displace smooth fits for i, block in enumerate(blocks): plot_smooth_fit(i, block, ax, colours, pwd, offset=offset) plot_eff_data(i, block, ax, colours, pwd, offset) ax[i].grid(True, alpha=0.3) ax[i].set_ylabel('efficiency (%)') src.ossos.planning.plotting.plot_fanciness.remove_border(ax[i]) ppl.legend(ax[i], loc='lower left', title=block, numpoints=1, fontsize='small', handletextpad=0.5) return outfile
def show_timeorder_info(Dt, mesh_sizes, errors): '''Performs consistency check for the given problem/method combination and show some information about it. Useful for debugging. ''' # Compute the numerical order of convergence. orders = {} for key in errors: orders[key] = _compute_numerical_order_of_convergence(Dt, errors[key]) # Print the data to the screen for i, mesh_size in enumerate(mesh_sizes): print print('Mesh size %d:' % mesh_size) print('dt = %e' % Dt[0]), for label, e in errors.items(): print(' err_%s = %e' % (label, e[i][0])), print for j in range(len(Dt) - 1): print(' '), for label, o in orders.items(): print(' ord_%s = %e' % (label, o[i][j])), print print('dt = %e' % Dt[j+1]), for label, e in errors.items(): print(' err_%s = %e' % (label, e[i][j+1])), print # Create a figure for label, err in errors.items(): pp.figure() ax = pp.axes() # Plot the actual data. for i, mesh_size in enumerate(mesh_sizes): pp.loglog(Dt, err[i], '-o', label=mesh_size) # Compare with order curves. pp.autoscale(False) e0 = err[-1][0] for o in range(7): pp.loglog([Dt[0], Dt[-1]], [e0, e0 * (Dt[-1] / Dt[0]) ** o], color='0.7') pp.xlabel('dt') pp.ylabel('||%s-%s_h||' % (label, label)) # pp.title('Method: %s' % method['name']) ppl.legend(ax, loc=4) pp.show() return
def plot_eff_by_user(ax, blocks): pwd = path + 'efficiency_personal_bias/' outfile = 'e-o_efficiency_by_user.pdf' col = tableau.ColorBlind_10.mpl_colors col = col[0:5] + [col[8]] # no. files examined, from plots Charles made: see http://wiki.ossos-survey.org/index.php/Core_Teleconf_2014_10_21 examined = {'13AE': {'jjk': 163, 'jkavelaars': 109, 'mtb55': 418}, '13AO': {'jkavelaars': 53, 'mtb55': 71, 'bgladman': 111, 'montys': 440, 'ptsws': 74}} # swap out user ID for anonymous 'pnum' values in the final displayed plot user_blindness = {'mtb55': 'p1', 'jjk': 'p2', 'jkavelaars': 'p3', 'montys': 'p4', 'bgladman': 'p5', 'ptsws': 'p6' } # Line colour and symbol consistent between plots for participant. colours = {'jjk': col[2], 'jkavelaars': col[1], 'mtb55': col[0], 'bgladman': col[3], 'montys': col[4], 'ptsws': col[5]} markers = {'jjk': '^', 'jkavelaars': 'd', 'mtb55': '*', 'bgladman': 'o', 'montys': '.', 'ptsws': 'x'} for i, block in enumerate(blocks): user_eff_files = filter(lambda x: x.__contains__(block), os.listdir(pwd)) # user_eff_files.sort(key=lambda x: examined[block][x.split('.')[2]], reverse=True) # biggest to smallest user_eff_files.sort(key=lambda x: user_blindness[x.split('.')[2]]) for j, fn in enumerate(user_eff_files): eff = Table.read(pwd + fn, names=['mag', 'eff'], format='ascii') user = fn.split('.')[2] ax.plot(eff['mag'], eff['eff'] * 100, c=colours[user], marker=markers[user], ms=5, mec=colours[user], alpha=0.7, # line thickness scaled by number of files examined: most first so that thickest line at the back linewidth=examined[block][user] * 0.02, label="{}: {}".format(user_blindness[user], examined[block][user]), zorder=1 - examined[block][user]) # not the appropriate smooth fit to be adding here. # plot_smooth_fit(i, block, ax, ['k'], path + 'efficiency_motion_rates', single=True) ax.grid(True, alpha=0.3) ax.set_ylabel('efficiency (%)') ax.set_ylim([0., 100.]) src.ossos.core.ossos.planning.plotting.plot_fanciness.remove_border(ax) ppl.legend(ax, loc='lower left', title=block, numpoints=1, fontsize='small', handletextpad=0.5) return outfile
def plot_eff_by_user(ax, blocks): pwd = path + 'efficiency_personal_bias/' outfile = 'e-o_efficiency_by_user.pdf' col = tableau.ColorBlind_10.mpl_colors col = col[0:5] + [col[8]] # no. files examined, from plots Charles made: see http://wiki.ossos-survey.org/index.php/Core_Teleconf_2014_10_21 examined = {'13AE': {'jjk': 163, 'jkavelaars': 109, 'mtb55': 418}, '13AO': {'jkavelaars': 53, 'mtb55': 71, 'bgladman': 111, 'montys': 440, 'ptsws': 74}} # swap out user ID for anonymous 'pnum' values in the final displayed plot user_blindness = {'mtb55': 'p1', 'jjk': 'p2', 'jkavelaars': 'p3', 'montys': 'p4', 'bgladman': 'p5', 'ptsws': 'p6' } # Line colour and symbol consistent between plots for participant. colours = {'jjk': col[2], 'jkavelaars': col[1], 'mtb55': col[0], 'bgladman': col[3], 'montys': col[4], 'ptsws': col[5]} markers = {'jjk': '^', 'jkavelaars': 'd', 'mtb55': '*', 'bgladman': 'o', 'montys': '.', 'ptsws': 'x'} for i, block in enumerate(blocks): user_eff_files = [x for x in os.listdir(pwd) if x.__contains__(block)] # user_eff_files.sort(key=lambda x: examined[block][x.split('.')[2]], reverse=True) # biggest to smallest user_eff_files.sort(key=lambda x: user_blindness[x.split('.')[2]]) for j, fn in enumerate(user_eff_files): eff = Table.read(pwd + fn, names=['mag', 'eff'], format='ascii') user = fn.split('.')[2] ax.plot(eff['mag'], eff['eff'] * 100, c=colours[user], marker=markers[user], ms=5, mec=colours[user], alpha=0.7, # line thickness scaled by number of files examined: most first so that thickest line at the back linewidth=examined[block][user] * 0.02, label="{}: {}".format(user_blindness[user], examined[block][user]), zorder=1 - examined[block][user]) # not the appropriate smooth fit to be adding here. # plot_smooth_fit(i, block, ax, ['k'], path + 'efficiency_motion_rates', single=True) ax.grid(True, alpha=0.3) ax.set_ylabel('efficiency (%)') ax.set_ylim([0., 100.]) src.ossos.planning.plotting.plot_fanciness.remove_border(ax) ppl.legend(ax, loc='lower left', title=block, numpoints=1, fontsize='small', handletextpad=0.5) return outfile
def new_make_plot(out_fn): fig, ax = plt.subplots(1) for k in ping_results.keys(): x_data = [] y_data = [] for i, resp_time in enumerate(ping_results[k]): x_data.append(i) y_data.append(resp_time) ppl.plot(ax, x_data, y_data, label=k, linewidth=0.75) #ppl.legend(ax) ppl.legend(ax, loc='lower left', ncol=4) #ax.set_title('test') fig.savefig(out_fn) print "Saved to file: %s" % out_fn
def experiment_threshold(x, y): #x = [i for i in xrange(300)] #y = [random.randint(1, 100)*i if i > 150 else random.randint(1,100)*2*i if i < 200 else random.randint(1,50)*i/2 for i in xrange(300)] #y = [100 if i < 100 else i if i < 120 else (240 - i) if i < 140 else 100 for i in xrange(300)] fig = pyplot.figure() ax = fig.add_subplot(111) pplot.plot(ax, x, y, "#BA5EDD", linewidth=4,alpha=1,label=r"$T$") #pplot.plot(ax, x, averages, "green", linewidth=4, alpha=1,label=r"$\mu$") pplot.plot(ax, x, y, "^", markevery=SKIP) #pplot.plot(ax, x, averages, "o", markevery=SKIP) for colour, symbol, threshold in zip(("b", "y", "r"), ('s', 'p', 'D'), (1, 3, 5)): sd = SpikeDetector(x, y, threshold=threshold) spikes, averages = sd.get_spikes() spikevalues = [y[i] for i in spikes] pplot.plot(spikes, spikevalues, colour + 'o', label=("%s" % threshold ) + r"$\sigma$ peaks") # Plot the threshold pplot.plot(ax, x,[a + sd.stddev * threshold for a in averages], colour, linewidth=2, alpha=1,label="$\mu$ + " + str(threshold) + r"$\sigma$") # Plot the markers on top, but only for every month? pplot.plot(ax, x,[a + sd.stddev * threshold for a in averages], symbol, markevery=SKIP, linewidth=2, alpha=1) for i, date in enumerate(map(lambda time: sd.get_date(time), spikes)): ax.annotate(date.date(), (spikes[i], spikevalues[i])) print "STDDEV: %s \n\n SPIKES: %s" % (threshold, spikes) ax.set_title("Time series for Wikipedia article 'Julian_Assange'") pyplot.ylabel("Page views") pyplot.xlabel("Days since 2008-01-01") font = {'family' : 'normal', 'size' : 14} matplotlib.rc('font', **font) pplot.legend() pyplot.show()
def scatterFreeByMissingEdges(type_header, type_table, config, instanceType, solsPath, solsExt, figName = None): if config not in type_table: raise Exception("Config \""+config+"\" not found!") if instanceType not in type_table[config]: raise Exception("Instance type \""+instanceType+"\" not found!") fig, ax = plt.subplots(1) for size in sorted(type_table[config][instanceType].iterkeys()): # instanceType fixedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.fixedEdges"])) blockedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.blockedEdges"])) instanceNumbers = np.array(type_table[config][instanceType][size]["instanceNumber"]) if size == 80: solsPath += '_80-90' missingEdges = [] freeEdges = [] for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges, instanceNumbers): instanceName = "%s_%03d_%02d%s" % (instanceType, size, instanceNumber, solsExt) solEdges = getNEdges(os.path.join(solsPath, instanceName)) mEdges = solEdges - fEdges missingEdges.append(mEdges) frEdges = ((size-1)*size)/2 - (fEdges + bEdges) freeEdges.append(frEdges) ppl.scatter(ax, missingEdges, freeEdges, label=str(size)) ppl.legend(ax, loc="lower right") ax.set_xlabel(u'Arestas faltantes') ax.set_ylabel(u'Arestas livres') # ax.set_aspect('equal') ax.set_xlim((0, ax.get_xlim()[1])) ax.set_ylim((0, ax.get_ylim()[1])) # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params') if figName != None: fig.savefig(figName, bbox_inches='tight')
def table_touch_cdf(): fig, [ax2, ax1] = plt.subplots(1, 2, sharey=True, figsize=(8, 4)) data = read_csv(['touch'], False) data.sort(order='touch') c = data['count'].astype(float) c /= sum(c) ppl.plot(ax1, data['touch'], np.cumsum(c), label="SQLShare", color=cs[0], linewidth=2, linestyle='-.') data = read_csv(['touch', 'counts'], True) c = data['counts'].astype(float) c /= sum(c) ppl.plot(ax2, data['touch'], np.cumsum(c), label="SDSS", color=cs[1], linewidth=2, linestyle='--') ppl.legend(ax1, loc='lower right') ppl.legend(ax2, loc='lower right') ax1.yaxis.set_major_formatter(formatter) ax2.yaxis.set_major_formatter(formatter) ax1.set_xlim(0, 500) ax1.set_xlim(0, 25) ax1.yaxis.grid() ax2.yaxis.grid() #ax1.set_xlabel('Table touch') fig.text(0.5, 0.02, "Table touch", ha='center') ax1.set_ylabel('% of queries') fig.subplots_adjust(wspace=0.1) ax1.set_ylim(0, 1.01) ax2.set_ylim(0, 1.01) fig.tight_layout(rect=[0, .03, 1, 1]) plt.show() fig.savefig('plot_touch_cdf.pdf', format='pdf', transparent=True) fig.savefig('plot_touch_cdf.png', format='png', transparent=True)
def compare_weeks(): users = User.objects.filter(is_active=True) today = datetime.date.today() start_of_week = today - datetime.timedelta(days=today.weekday()) start_of_week = datetime.datetime.combine(start_of_week, datetime.time()) last_week = start_of_week - datetime.timedelta(days=7) # Calculate this week's average rating for each Opinion Space # Statement avg_this_week = [] for i in range(1, 6): filtered = UserRating.objects.filter(user__in=users, created__gte=start_of_week, created__lt=start_of_week + datetime.timedelta(days=7), opinion_space_statement__statement_number=i) avg = filtered.aggregate(avg=Avg('rating'))['avg'] if not avg: avg = 0.0 avg_this_week.append(avg) # Calculate last week's average rating for each Opinion Space # Statement avg_last_week = [] for i in range(1, 6): filtered = UserRating.objects.filter(user__in=users, created__gte=last_week, created__lt=start_of_week, opinion_space_statement__statement_number=i) avg = filtered.aggregate(avg=Avg('rating'))['avg'] if not avg: avg = 0.0 avg_last_week.append(avg) labels = ['1', '2', '3', '4', '5'] fig, ax = plt.subplots() ind = np.arange(5) ppl.bar(ax, ind, avg_last_week, width=0.3, annotate=True, xticklabels=labels, color='r', label='Last week') ppl.bar(ax, ind+0.4, avg_this_week, width=0.3, annotate=True, xticklabels=labels, color='b', label='This week') plt.ylim(0.0, 1.1) ax.set_xlabel('QAT Number') ax.set_ylabel('Mean Rating') ppl.legend(ax, loc="upper right") fig.savefig('../../client/media/images/qat.png')
def runtime_cdf(): fig, [ax1, ax2] = plt.subplots(1, 2, sharey=True, figsize=(8, 4)) data = read_csv(['actual', 'counts'], True) c = data['counts'].astype(float) c /= sum(c) ppl.plot(ax1, data['actual'], np.cumsum(c), label="SDSS", color=cs[0], linewidth=2, ls='-.') data = read_csv(['time_taken'], False) data.sort(order='time_taken') c = data['count'].astype(float) c /= 1000 # ms to seconds c /= sum(c) ppl.plot(ax2, data['time_taken'], np.cumsum(c), label="SQLShare", color=cs[1], linewidth=2, ls='--') ppl.legend(ax1, loc='lower right') ppl.legend(ax2, loc='lower right') plt.gca().yaxis.set_major_formatter(formatter) #ax.set_xlabel('Runtime in seconds') ax1.set_ylabel('% of queries') fig.text(0.5, 0.02, "Runtime in seconds", ha='center') ax1.yaxis.grid() ax2.yaxis.grid() fig.subplots_adjust(wspace=0.1) ax1.set_xlim(0, 6) ax2.set_xlim(0, 500) ax1.set_ylim(0, 1.01) ax2.set_ylim(0, 1.01) fig.tight_layout(rect=[0, .03, 1, 1]) plt.show() fig.savefig('plot_runtimes_cdf.pdf', format='pdf', transparent=True) fig.savefig('plot_runtimes_cdf.png', format='png', transparent=True)
def ppl_scatter (): ''' This function draws a simple prettyplotlib scatter graph that reproduces the graph created in part 2. Note: This function requires "prettyplotlib" library. ''' np.random.seed(12) ax = fig.add_subplot(1,3,3) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(ax, x, y, label=str(i)) ppl.legend(ax,loc=4,fontsize=11) ax.set_title('A prettyplotlib `scatter` example\n' 'showing default color cycle and scatter params',fontsize=12)
def df_scatter_plot(df, x=None, y=None, label=None, **kwargs): if label is None: if len(df.columns) != 3: raise ValueError("I can't (or rather won't) guess the label if there's not exactly 3 columns. " "You need to specify it") else: label = [t for t in df.columns if t not in [x, y]][0] colors = kwargs.pop('colors', None) label_list = kwargs.pop('label_list', np.array(df[label].unique())) fig, ax = mpl_plt.subplots(1) for i, this_label in enumerate(label_list): d = df[df[label] == this_label] xvals = np.array(d[x]) yvals = np.array(d[y]) if colors: ppl.scatter(ax, xvals, yvals, label=str(i), facecolor=colors[i], **kwargs) else: ppl.scatter(ax, xvals, yvals, label=str(i), **kwargs) ppl.legend(ax)
def plot_coef_split(coef_list,err_list,coef_name_list,coef_site_list,title="",cmap=ppl.set2): N = len(coef_list) # num sites M = len(coef_list[0]) # num coefs fig, ax = subplots(1,M,figsize=(3*M,4)) #One plot per coef width = 1./(N) for j in range(M): #for each coef a = ax[j] a.set_xlim(.2,1.2) for i in range(N): #for each site X = [i*width] #there is one coef ppl.bar(a,X,[coef_list[i][j]], yerr = [err_list[i][j]], label=coef_site_list[i], width=width,color=cmap[i],annotate=True) a.set_xticks([]) a.set_xticklabels([]) a.set_title(coef_name_list[j]) ppl.legend(a)
def ppl_scatter(): ''' This function draws a simple prettyplotlib scatter graph that reproduces the graph created in part 2. Note: This function requires "prettyplotlib" library. ''' np.random.seed(12) ax = fig.add_subplot(1, 3, 3) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(ax, x, y, label=str(i)) ppl.legend(ax, loc=4, fontsize=11) ax.set_title( 'A prettyplotlib `scatter` example\n' 'showing default color cycle and scatter params', fontsize=12)
def plot(weights_vector): # Use a temporal resolution of 1000. x_real = np.linspace(0, 1, 1000) y_real = np.array([exercise_11.f(x_sample) for x_sample in x_real]) fig, ax = plt.subplots(1) ppl.plot(ax, x_real, y_real, linewidth=0.75, label='Function') i = 0 which = [0, 1, 3, 9] for weights in weights_vector: poly_output = [eval_polynomial(weights, x) for x in x_real] ppl.plot(ax, x_real, poly_output, linewidth=0.75, label='M = ' + str(which[i])) i = i + 1 ppl.legend(ax, loc='upper right', ncol=2) ax.set_xlabel('x') ax.set_ylabel('y') ax.set_title('Polynomial approximation of sine function') fig.savefig('exercise_polynomial_plot40.pdf')
def make_plots(d,correct): men = d[0] women = d[1] if correct: ay = convert_times(men) Ym = correct_times('m',ay) Xm = convert_dates(men) print "hola" print len(Xm) print len(Ym) ppl.scatter(ax,Xm,Ym,label="Men's speeds") aw = convert_times(women) Yw = correct_times('w',aw) Xw = convert_dates(women) ppl.scatter(ax,Xw,Yw,label="Women's speeds") ppl.legend(ax) ax.set_title("Bay To Breakers Speeds (Seconds per Mile)") fig.savefig("b2bwinningtimescorrected.png") else: Ym = convert_times(men) Xm = convert_dates(men) ppl.scatter(ax,Xm,Ym,label="Men's Times") Yw = convert_times(women) Xw = convert_dates(women) ppl.scatter(ax,Xw,Yw,label="Women's Times") ppl.legend(ax) ax.set_title("Bay To Breakers times (seconds)") fig.savefig("b2bwinningtimes.png")
def plot_distributions(G,H): G_degrees = sorted(nx.degree(G).values(),reverse=True) H_degrees = sorted(nx.degree(H).values(),reverse=True) M = nx.number_of_edges(G) # get degree frequencies and add attributes G_k = [] G_p_k = [] for n in set(G_degrees): G_k.append(n) G_p_k.append(G_degrees.count(n)/float(N)) H_k = [] H_p_k = [] for n in set(H_degrees): H_k.append(n) H_p_k.append(H_degrees.count(n)/float(N)) # Create a plot fig, ax = plt.subplots(1) # Axis needs to be set before plotting the bars # ax.set_yscale('log') # ax.set_xscale('log') ppl.plot(ax, G_k, G_p_k, 'o-', color = "#9e9ac8", label = "Network with power-law distribution") ppl.plot(ax, H_k, H_p_k, 'o-', color = "#6baed6", label = "Network with binomial degree distribution") //ax.set_title("Degree distribution of networks with "+str(N)+" nodes and "+str(M)+" edges.\n") ax.xaxis.set_label_text("degree (k)") ax.yaxis.set_label_text("Degree frequency P(k)") ppl.legend(ax, loc='upper right', ncol=1) ax.set_xlim([0,20]) #Otherwise we can'st see the curves they are so close ot 0 fig.savefig('test_distribution.pdf')
def opcounts(): fig, ax = plt.subplots(1, figsize=(8, 4)) ax.set_yscale('log') data = read_csv(['physops', 'counts'], True) #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True) y, x = np.array(np.histogram(data['physops'], 10, weights=data['counts'])) w = x[1] - x[0] x += w/2 data = [a for a in zip(list(x), list(y)) if a[1]] x = [i[0] for i in data] y = [i[1] for i in data] ppl.scatter(ax, x=x, y=y, marker="o", color=pcs[0], s=100, label="SDSS") data = read_csv(['ops'], False) d = data['ops'] y, x = np.histogram(d, bins=np.linspace(min(d), max(d), (max(d) - min(d)) / w), weights=data['count']) x += w/2 data = [a for a in zip(list(x), list(y)) if a[1]] x = [i[0] for i in data] y = [i[1] for i in data] ppl.scatter(ax, x=x, y=y, marker="v", color=pcs[1], s=100, label="SQLShare") ax.set_xlabel('Physical operators used') ax.set_ylabel('# of queries') ppl.legend(ax, loc='lower right') ax.set_xlim(0) ax.set_ylim(0) fig.tight_layout() plt.show() fig.savefig('plot_logops_query.pdf', format='pdf', transparent=True) fig.savefig('plot_logops_query.png', format='png', transparent=True)
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9): """Plot all `clusters` among `candidates` with the `bounds` of the city (or at least `shrink` of them). Also plot convex `hulls` of gold areas if provided.""" xbounds, ybounds = bounds unique_labels = len(clusters) clustered = set().union(*map(list, clusters)) noise = list(candidates.difference(clustered)) if unique_labels > 5: colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels + 1)) else: colors = [gray, red, green, blue, orange] plt.figure(figsize=(20, 15)) for k, indices, col in zip(range(unique_labels + 1), [noise] + clusters, colors): k -= 1 if k == -1: col = 'gray' ppl.scatter(vloc[indices, 0], vloc[indices, 1], s=35 if k != -1 else 16, color=col, alpha=0.8 if k != -1 else 0.6, label='noise' if k == -1 else 'cluster {}'.format(k + 1)) hulls = hulls or [] for idx, hull in enumerate(hulls): first_again = range(len(hull)) + [0] ppl.plot(hull[first_again, 0], hull[first_again, 1], '--', c=ppl.colors.almost_black, lw=1.0, alpha=0.9, label='gold region' if idx == 0 else None) plt.xlim(shrink * xbounds) plt.ylim(shrink * ybounds) ppl.legend()
def plot_rmse(y_training, weights_vector): x_test = np.linspace(0, 1, 100) y_test = exercise_11.sample_gaussian(exercise_11.f, 0, 0.3, x_test) rmse_training = np.zeros((len(weights_vector))) rmse_test = np.zeros((len(weights_vector))) i = 0 for weights in weights_vector: poly_output_100 = [eval_polynomial(weights, x) for x in np.linspace(0, 1, 100)] poly_output_40 = [eval_polynomial(weights, x) for x in np.linspace(0, 1, 40)] rmse_training[i] = np.sqrt(np.mean((poly_output_40 - y_training)**2)) rmse_test[i] = np.sqrt(np.mean((poly_output_100 - y_test)**2)) i = i + 1 fig, ax = plt.subplots(1) ppl.plot(ax, np.arange(10), rmse_training, linewidth=0.75, label='RMSE on training set') ppl.plot(ax, np.arange(10), rmse_test, linewidth=0.75, label='RMSE on test set') ppl.legend(ax, loc='upper right', ncol=2) ax.set_xlabel('Polynomial order') ax.set_ylabel('RMSE') ax.set_title('RMSE for the polynomial approximation of sine function') fig.savefig('exercise_rmse_plot40.pdf')
def query_length_cdf(): fig, ax = plt.subplots(1) data = read_csv(['lengths', 'counts'], True) c = data['counts'].astype(float) c /= sum(c) ppl.plot(ax, data['lengths'], np.cumsum(c), label="SDSS", color=cs[0], linewidth=2, ls='-.') data = read_csv(['lengths'], False) data.sort(order='length') c = data['count'].astype(float) c /= sum(c) ppl.plot(ax, data['length'], np.cumsum(c), label="SQLShare", color=cs[1], linewidth=2, ls='--') data = read_data(['lengths', 'counts'], 'tpch') data.sort(order='lengths') c = data['counts'].astype(float) c /= sum(c) ppl.plot(ax, data['lengths'], np.cumsum(c), label="TPC-H", color=cs[2], linewidth=2, ls=':') ppl.legend(ax, loc='lower right') plt.gca().yaxis.set_major_formatter(formatter) ax.set_xlabel('Query length in characters') ax.set_ylabel('% of queries') ax.set_ylim(0, 1.01) ax.set_xlim(0, 2000) ax.yaxis.grid() plt.show() fig.savefig('plot_lengths.pdf', format='pdf', transparent=True) fig.savefig('plot_lengths.png', format='png', transparent=True)
#!/usr/bin/python # encoding=utf-8 import prettyplotlib as ppl import numpy as np # prettyplotlib imports import matplotlib.pyplot as plt import matplotlib as mpl from prettyplotlib import brewer2mpl # Set the random seed for consistency np.random.seed(12) fig, ax = ppl.subplots(1) # Show the whole color range for i in range(8): y = np.random.normal(size=1000).cumsum() x = np.arange(1000) # For now, you need to specify both x and y :( # Still figuring out how to specify just one ppl.plot(ax, x, y, label=str(i), linewidth=0.75) ppl.legend(ax) fig.show()
ppl.plot(ax, xs, curData[:, 3], '-o', label=('%dx%d B %s' % (l, sz, alloc)), linewidth=2) ax.set_xlabel('#threads, x 1024') ax.set_ylabel('Throughput, Mops/s') if (ialloc == len(allocators) - 1 and sz == 64): ax.set_xticks(xs) ax.set_xticklabels(['%.0lf' % d for d in curData[:, 2] / 1024.0]) ax.axis(xmin=-1, xmax=len(xs), ymin=ymin, ymax=ymax) ax.grid(axis='y', color='0.3', linestyle=':', antialiased=True) ax.set_title('Private Test Pair Throughput') ppl.legend(ax, loc=0) plt.tick_params(axis='both', which='major', direction='in', bottom=True) outputfilename = 'vs-priv-pair.pdf' funlink(outputfilename) fig.savefig(outputfilename, dpi=300, bbox_inches='tight') #plt.show() inputFileName = "exp-log-spree.csv" data = np.loadtxt(inputFileName, skiprows=1, usecols=[1, 2, 3, 4, 5, 6]) allocators = ['Halloc', 'ScatterAlloc', 'CUDA'] nps0 = data.shape[0] fig = plt.figure(figsize=(12, 7)) ax = fig.add_subplot(111) ax.set_yscale('log')
def PlotGene(Sequences, Background, gene, IterParameters, TransitionTypeFirst='nonhomo', no_plot=False, Start=0, Stop=-1, figsize=(6, 8), dir_ylim=[], out_name=None): ''' This function plot the coverage and the parameters for the model ''' reload(diag_event_model) reload(emission) set2 = brewer2mpl.get_map('Dark2', 'qualitative', 8).mpl_colors TransitionParameters = IterParameters[1] EmissionParameters = IterParameters[0] TransitionType = EmissionParameters['TransitionType'] PriorMatrix = EmissionParameters['PriorMatrix'] NrOfStates = EmissionParameters['NrOfStates'] Sequences_per_gene = PreloadSequencesForGene(Sequences, gene) Background_per_gene = PreloadSequencesForGene(Background, gene) if EmissionParameters['FilterSNPs']: Ix = tools.GetModelIx(Sequences_per_gene, Type='no_snps_conv', snps_thresh=EmissionParameters['SnpRatio'], snps_min_cov=EmissionParameters['SnpAbs'], Background=Background_per_gene) else: Ix = tools.GetModelIx(Sequences_per_gene) #2) Compute the probabilities for both states EmmisionProbGene = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGene_Dir = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGeneNB_fg = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGeneNB_bg = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) CurrStackSum = tools.StackData(Sequences_per_gene) CurrStackVar = tools.StackData(Sequences_per_gene, add='no') nr_of_genes = len(Sequences.keys()) gene_nr_dict = {} for i, curr_gene in enumerate(Sequences.keys()): gene_nr_dict[curr_gene] = i #Compute the emission probapility for State in range(NrOfStates): if not EmissionParameters['ExpressionParameters'][0] == None: EmmisionProbGene[ State, :] = emission.predict_expression_log_likelihood_for_gene( CurrStackSum, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters) EmmisionProbGeneNB_fg[ State, :] = emission.predict_expression_log_likelihood_for_gene( CurrStackSum, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters) if EmissionParameters['BckType'] == 'Coverage': EmmisionProbGene[ State, :] += emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') EmmisionProbGeneNB_bg[ State, :] = emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') if EmissionParameters['BckType'] == 'Coverage_bck': EmmisionProbGene[ State, :] += emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') EmmisionProbGeneNB_bg[ State, :] = emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') if not EmissionParameters['ign_diag']: EmmisionProbGene[State, Ix] += diag_event_model.pred_log_lik( CurrStackVar[:, Ix], State, EmissionParameters) EmmisionProbGene_Dir[State, Ix] = diag_event_model.pred_log_lik( CurrStackVar[:, Ix], State, EmissionParameters) #Get the transition probabilities if TransitionTypeFirst == 'nonhomo': if TransitionType == 'unif_bck' or TransitionType == 'binary_bck': CountsSeq = tools.StackData(Sequences_per_gene, add='all') CountsBck = tools.StackData(Background_per_gene, add='only_cov') Counts = np.vstack((CountsSeq, CountsBck)) else: Counts = tools.StackData(Sequences_per_gene, add='all') TransistionProbabilities = np.float64( trans.PredictTransistions(Counts, TransitionParameters, NrOfStates, TransitionType)) else: TransistionProbabilities = np.float64( np.tile(np.log(TransitionParameters[0]), (EmmisionProbGene.shape[1], 1, 1)).T) MostLikelyPath, LogLik = viterbi.viterbi(np.float64(EmmisionProbGene), TransistionProbabilities, np.float64(np.log(PriorMatrix))) for j in range(NrOfStates): print str(np.sum(MostLikelyPath == j)) if no_plot: return MostLikelyPath, TransistionProbabilities, EmmisionProbGene #pdb.set_trace() fig, axes = plt.subplots(nrows=9, figsize=figsize) fig.subplots_adjust(hspace=1.001) Counts = tools.StackData(Sequences_per_gene, gene, add='no') if Stop == -1: Stop = Counts.shape[1] if Stop == -1: plt_rng = np.array(range(Start, Counts.shape[1])) else: plt_rng = np.array(range(Start, Stop)) i = 0 color = set2[i] nr_of_rep_fg = len(Sequences[gene]['Coverage'].keys()) i += 1 Ix = repl_track_nr([2, 16], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='TC', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([0, 1, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='NonTC', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([20], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Read-ends', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([4, 9, 14, 19], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Deletions', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([21], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Coverage', linewidth=2, color=color) color = set2[i] i += 1 axes[0].set_ylabel('Counts') axes[0].set_xlabel('Position') axes[0].set_title('Coverage and Conversions') axes[0].get_xaxis().get_major_formatter().set_useOffset(False) BckCov = Background_per_gene['Coverage'][0] for i in range(1, len(Background_per_gene['Coverage'].keys())): BckCov += Background_per_gene['Coverage'][str(i)] ppl.plot(axes[0], plt_rng, (BckCov.T)[Start:Stop], ls='-', label='Bck', linewidth=2, color=color) ppl.legend(axes[0]) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[1], plt_rng, (TransistionProbabilities[j, j, :])[Start:Stop], label='Transition ' + str(j) + ' ' + str(j), linewidth=2, color=color) ppl.legend(axes[1]) axes[1].set_ylabel('log-transition probability') axes[1].set_xlabel('Position') axes[1].set_title('Transition probability') axes[1].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[2], plt_rng, (EmmisionProbGene[j, :][Start:Stop]), label='Emission ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[2].set_ylim( (np.min(np.min(EmmisionProbGene[0:2, :][:, Start:Stop])), 1)) ppl.legend(axes[2]) axes[2].set_ylabel('log-GLM probability') axes[2].set_xlabel('Position') axes[2].set_title('Emission probability') axes[2].get_xaxis().get_major_formatter().set_useOffset(False) ppl.plot(axes[3], plt_rng, MostLikelyPath[Start:Stop]) axes[3].set_ylabel('State') axes[3].set_xlabel('Position') axes[3].set_title('Most likely path') axes[3].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[4], plt_rng, EmmisionProbGene_Dir[j, :][Start:Stop], label='Dir State ' + str(j), linewidth=2, color=color) if len(dir_ylim) > 0: axes[4].set_ylim(dir_ylim) ppl.legend(axes[4]) axes[4].set_ylabel('log-DMM probability') axes[4].set_xlabel('Position') axes[4].set_title('DMM probability') axes[4].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[5], plt_rng, EmmisionProbGeneNB_fg[j, :][Start:Stop], label='NB fg ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[5].set_ylim( [np.min(np.min(EmmisionProbGeneNB_fg[0:2, :][:, Start:Stop])), 1]) ppl.legend(axes[5]) axes[5].set_ylabel('prob') axes[5].set_xlabel('Position') axes[5].set_title('prob-fg') axes[5].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[6], plt_rng, EmmisionProbGeneNB_bg[j, :][Start:Stop], label='NB bg ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[6].set_ylim( [np.min(np.min(EmmisionProbGeneNB_bg[0:3, :][:, Start:Stop])), 1]) ppl.legend(axes[6]) axes[6].set_ylabel('prob') axes[6].set_xlabel('Position') axes[6].set_title('prob-bg') axes[6].get_xaxis().get_major_formatter().set_useOffset(False) fg_state, bg_state = emission.get_fg_and_bck_state(EmissionParameters, final_pred=True) ix_bg = range(EmmisionProbGene.shape[0]) ix_bg.remove(fg_state) FGScore = EmmisionProbGene[fg_state, :] AltScore = EmmisionProbGene[ix_bg, :] norm = logsumexp(AltScore, axis=0) ix_ok = np.isinf(norm) + np.isnan(norm) if np.sum(ix_ok) < norm.shape[0]: SiteScore = FGScore[ix_ok == 0] - norm[ix_ok == 0] else: print 'Score problematic' SiteScore = FGScore ppl.plot(axes[7], plt_rng, SiteScore[Start:Stop]) axes[7].set_ylabel('log-odd score') axes[7].set_xlabel('Position') axes[7].set_title('log-odd score') axes[7].get_xaxis().get_major_formatter().set_useOffset(False) FGScore = EmmisionProbGene_Dir[fg_state, :] AltScore = EmmisionProbGene_Dir[ix_bg, :] norm = logsumexp(AltScore, axis=0) ix_ok = np.isinf(norm) + np.isnan(norm) if np.sum(ix_ok) < norm.shape[0]: SiteScore = FGScore[ix_ok == 0] - norm[ix_ok == 0] else: print 'Score problematic' SiteScore = FGScore ppl.plot(axes[8], plt_rng, SiteScore[Start:Stop]) axes[8].set_ylabel('DMM log-odd score') axes[8].set_xlabel('Position') axes[8].set_title('DMM log-odd score') axes[8].get_xaxis().get_major_formatter().set_useOffset(False) if not (out_name is None): print 'Saving result' fig.savefig(out_name) plt.show() return MostLikelyPath, TransistionProbabilities, EmmisionProbGeneNB_fg
ax.hist(ossos_mas, histtype='step', bins=bins, range=(0, maxrange), color='b', linestyle='solid', label='OSSOS 13A') ax.grid(True, alpha=0.3) plt.xlabel("astrometric residual (arcsec)") plt.ylabel("number of astrometric measurements") plt.xlim([0, .85]) handler_map = [ mlines.Line2D([], [], color='r', linestyle='solid', lw=2.1, alpha=0.3, label='CFEPS L7'), mlines.Line2D([], [], color='b', label='OSSOS 13A'), ] ppl.legend(title="Survey", fontsize='small', handletextpad=0.5) src.ossos.planning.plotting.plot_fanciness.remove_border(ax) plt.draw() outfile = path.rpartition( 'data')[0] + 'figures/astrometric_residual_50mas_bins.pdf' plt.savefig(outfile, transparent=True, bbox_inches='tight')
def stackedBarsBlockedEdgesByRoutine(type_header, type_table, config, instanceType, solsPath=None, solsExt=None, useTotalEdges=False, figName=None, routine_map=None): if config not in type_table: raise Exception("Config \"" + config + "\" not found!") if instanceType not in type_table[config]: raise Exception("Instance type \"" + instanceType + "\" not found!") fig, ax = plt.subplots(1) routines = [] rgexs = r'[^.]+\.([^.]+).blockedEdges' routineR = re.compile(rgexs) for h in type_header: routine = routineR.search(h) if routine != None: routines.append(routine.group(1)) data = [] sizes = sorted(type_table[config][instanceType].iterkeys()) for size in sizes: if useTotalEdges: totalVars = np.array( map(int, type_table[config][instanceType][size]['nVarsTotal'])) totalEdges = [size * (size - 1) / 2] * len(totalVars) refNEdges = totalEdges else: solsNEdges = getNEdgesArray(type_table, type_header, config, instanceType, size, solsPath, solsExt) totalEdges = [size * (size - 1) / 2] * len(solsNEdges) refNEdges = totalEdges - solsNEdges for index, routine in enumerate(routines): if index >= len(data): data.append([]) columnName = 'preproc.' + routine + '.blockedEdges' blockedEdges = np.array( map(int, type_table[config][instanceType][size][columnName])) propBlockedEdges = np.array( map(lambda r, t: float(r) / float(t), blockedEdges, refNEdges)) avgPropBlockedEdges = propBlockedEdges.mean() * 100.0 if figName == 'bEdges-exclReg-routines-mdtp.pdf': print size, ": bEdges:", routine, ":", avgPropBlockedEdges data[index].append(avgPropBlockedEdges) # Get "Set2" colors from ColorBrewer (all colorbrewer scales: http://bl.ocks.org/mbostock/5577023) colors = brewer2mpl.get_map('Set2', 'qualitative', max(len(routines), 3)).mpl_colors clean_data = [] clean_routines = [] clean_colors = [] for index, routine in enumerate(routines): if data[index].count(0) != len(data[index]): clean_data.append(data[index]) clean_routines.append(routine) clean_colors.append(colors[index]) data = np.array(clean_data) routines = clean_routines colors = clean_colors if routine_map != None: # labels = np.array( [routine_map[r] for r in routines] ) labels = np.array(map(routine_map.get, routines)) else: labels = np.array(routines) bottom = np.vstack((np.zeros( (data.shape[1], ), dtype=data.dtype), np.cumsum(data, axis=0)[:-1])) width = 0.8 ind = [x - width / 2 for x in range(1, len(data[0]) + 1)] for dat, lab, bot, col in reversed(zip(data, labels, bottom, colors)): ppl.bar(ax, ind, dat, width, grid='y', bottom=bot, label=lab, color=col) ax.set_xlabel(u'# Pontos') ax.set_xticks(range(1, len(sizes) + 1)) ax.set_xticklabels(map(str, sizes)) ax.set_xlim(0.5, len(sizes) + 0.5) ax.set_ylabel(u'% Arestas bloqueadas') ppl.legend(ax, loc="lower right") if figName != None and type(figName) == str: fig.savefig(figName, bbox_inches='tight')
xx = [] yx = [] for y in years: xx.append(int(y)) val = s["data"][y]["max"] yx.append(val) ax.scatter(xx, yx, marker='o') ppl.scatter(ax, xx, yx, alpha=0.8, edgecolor='black', linewidth=0.15, label=str(s["station_num"])) ppl.legend(ax, loc='right', ncol=1) ax.set_xlabel('Year') ax.set_ylabel('water level (m)') ax.set_title("Stations exceeding " + str(num_years_required) + " years worth of water level data (MHHW)") fig.set_size_inches(14, 8) # <markdowncell> # ### Number of stations available by number of years # <codecell> fig, ax = plt.subplots(1) year_list_map = []
def plot_distrib(distrib_method_name): fig, ax = plt.subplots(1) ax.set_title(distrib_method_name + " distribution") plt.subplots_adjust(left=0.1, bottom=0.25) # Default parameters of the distribution params, rangexy = prepare_params(distrib_method_name) distrib = getattr( scipy.stats, distrib_method_name) # call method norm() from string 'norm' # Initial plot x = np.linspace(rangexy[0], rangexy[1], 100) line_pdf, = ppl.plot(ax, x, distrib.pdf(x, *params), lw=2, color='red', label="pdf") line_cdf, = ppl.plot(ax, x, distrib.cdf(x, *params), lw=1, color='lightgrey', label="cdf") plt.axis(rangexy) ppl.legend(ax) ## Updated plot on event axcolor = 'lightgoldenrodyellow' ax_params = [] slider_params = [] for i in range(len(params)): # Create axis for each param ax_param = plt.axes([0.25, 0.15 - i * .05, 0.65, 0.03], axisbg=axcolor, frameon=False) ax_params.append(ax_param) # Create sliders based on param axes slider_param = Slider(ax_params[i], 'param ' + str(i + 1), 0.1, 10.0, valinit=params[i]) slider_params.append(slider_param) def update(val): updated_params = [0] * len(params) for i in range(len(params)): updated_params[i] = slider_params[i].val line_pdf.set_ydata(distrib.pdf(x, *updated_params)) line_cdf.set_ydata(distrib.cdf(x, *updated_params)) fig.canvas.draw_idle() for slider_param in slider_params: slider_param.on_changed(update) resetax = plt.axes([0.8, 0.025, 0.1, 0.04]) button = Button(resetax, 'Reset', color=axcolor, hovercolor='0.975') def reset(event): for slider_param in slider_params: slider_param.reset() button.on_clicked(reset) # NOT working when wrapped inside plot_distrib() plt.show()