def distopcounts(): fig, ax = plt.subplots(1, figsize=(8, 4)) ax.set_yscale('log') data = read_csv(['distinct_ops', 'counts'], True) #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True) ppl.scatter(ax, data['distinct_ops'], data['counts'], color=pcs[0], marker="o", label="SDSS", s=100) data = read_csv(['distinct_physical_ops'], False) ppl.scatter(ax, data['distinct_physical_ops'], data['count'], color=pcs[1], marker="v", label="SQLShare", s=100) ax.set_xlabel('Distinct physical operators used') ax.set_ylabel('# of queries') ppl.legend(ax, loc='lower right') ax.set_xlim(0) ax.set_ylim(0) fig.tight_layout() plt.show() fig.savefig('plot_dist_physops_query.pdf', format='pdf', transparent=True) fig.savefig('plot_dist_physops_query.png', format='png', transparent=True)
def plot_profile(self, fig, ax, x, y, xlabel='', ylabel='', axis_font={}, tick_font={}, scatter=False, **kwargs): if not axis_font: axis_font = axis_font_default if scatter: ppl.scatter(ax, x, y, **kwargs) else: ppl.plot(ax, x, y, **kwargs) if xlabel: ax.set_xlabel(xlabel.replace("_", " "), labelpad=5, **axis_font) if ylabel: ax.set_ylabel(ylabel.replace("_", " "), labelpad=11, **axis_font) if tick_font: ax.tick_params(**tick_font) ax.xaxis.set_label_position('top') # this moves the label to the top ax.xaxis.set_ticks_position('top') ax.xaxis.get_major_locator()._nbins = 5 ax.grid(True) plt.tight_layout()
def xy_linear_regression(df, x_var, y_var, **kwargs): fit_intercept = kwargs.get('fit_intercept', True) d = df[[x_var, y_var]] d = d.dropna() x = np.matrix(d[x_var]).transpose() y = np.matrix(d[y_var]).transpose() clf = linear_model.LinearRegression( fit_intercept=kwargs.get('fit_intercept', True), normalize=kwargs.get('normalize', False)) clf.fit(x, y) intercept, slope = clf.intercept_[0], clf.coef_[0][0] if kwargs.get('plot', False): ppl.scatter(x=d[x_var], y=d[y_var]) x_line = np.array([0, d[x_var].max()]) plt.plot(x_line, intercept + slope * x_line, 'k-') plt.annotate('slope={slope:.4f}\nintercept={intercept:.4f}'.format( slope=slope, intercept=intercept), (0.05, 0.9), xycoords='axes fraction') plt.xlabel(x_var) plt.ylabel(y_var) return intercept, slope
def new_tables_cdf(): fig, ax = plt.subplots(1) data = read_csv(['query_number', 'num_new_tables'], True) c = data['num_new_tables'].astype(float) c /= sum(c) q = data['query_number'].astype(float) q /= q[-1] ppl.plot(ax, q, np.cumsum(c), label="SDSS", color=cs[0], linewidth=2, ls='-.', drawstyle='steps-post') ppl.scatter(ax, q, np.cumsum(c), color=cs[0], marker="o", s=100) data = read_csv(['table_coverage'], False) c = data['tables'].astype(float) c /= c[-1] q = data['query_id'].astype(float) q /= q[-1] ppl.plot(ax, q, c, label="SQLShare", color=cs[1], linewidth=2, ls='-.', drawstyle='steps-post') ppl.scatter(ax, q, c, color=cs[1], marker="o", s=100) ppl.legend(ax, loc='lower right') plt.gca().yaxis.set_major_formatter(formatter) ax.set_xlabel('Query number') ax.set_ylabel('% of newly used table') ax.set_ylim(0, 1.01) ax.set_xlim(0, 1) ax.yaxis.grid() plt.show() fig.savefig('num_new_tables.pdf', format='pdf', transparent=True) fig.savefig('num_new_tables.png', format='png', transparent=True)
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9): """Plot all `clusters` among `candidates` with the `bounds` of the city (or at least `shrink` of them). Also plot convex `hulls` of gold areas if provided.""" xbounds, ybounds = bounds unique_labels = len(clusters) clustered = set().union(*map(list, clusters)) noise = list(candidates.difference(clustered)) if unique_labels > 5: colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels+1)) else: colors = [gray, red, green, blue, orange] plt.figure(figsize=(20, 15)) for k, indices, col in zip(range(unique_labels+1), [noise]+clusters, colors): k -= 1 if k == -1: col = 'gray' ppl.scatter(vloc[indices, 0], vloc[indices, 1], s=35 if k != -1 else 16, color=col, alpha=0.8 if k != -1 else 0.6, label='noise' if k == -1 else 'cluster {}'.format(k+1)) hulls = hulls or [] for idx, hull in enumerate(hulls): first_again = range(len(hull))+[0] ppl.plot(hull[first_again, 0], hull[first_again, 1], '--', c=ppl.colors.almost_black, lw=1.0, alpha=0.9, label='gold region' if idx == 0 else None) plt.xlim(shrink*xbounds) plt.ylim(shrink*ybounds) ppl.legend()
def scatter_plot(x,y,title,save,l): ppl.scatter(ax,x,y) ax.set_title(title) ppl.show() if save: ax.set fig.savefig(l)
def plot_time_series(self, fig, is_timeseries, ax, x, y, fill=False, title='', xlabel='', ylabel='', title_font={}, axis_font={}, tick_font={}, scatter=False, qaqc=[], events={}, **kwargs): if not title_font: title_font = title_font_default if not axis_font: axis_font = axis_font_default if scatter: ppl.scatter(ax, x, y, **kwargs) else: h = ppl.plot(ax, x, y, **kwargs) if is_timeseries: self.get_time_label(ax, x) fig.autofmt_xdate() else: ax.set_xlabel(xlabel.replace("_", " "), **axis_font) if ylabel: ax.set_ylabel(ylabel.replace("_", " "), **axis_font) if title: ax.set_title(title.replace("_", " "), **title_font) ax.grid(True) if fill: miny = min(ax.get_ylim()) if not scatter: ax.fill_between(x, y, miny+1e-7, facecolor = h[0].get_color(), alpha=0.15) else: ax.fill_between(x, y, miny+1e-7, facecolor = axis_font_default['color'], alpha=0.15) if events: ylim = ax.get_ylim() for event in events['events']: time = datestr2num(event['start_date']) x = np.array([time, time]) h = ax.plot(x, ylim, '--', label=event['class']) legend = ax.legend() if legend: for label in legend.get_texts(): label.set_fontsize(10) if len(qaqc) > 0: bad_data = np.where(qaqc > 0) h = ppl.plot(ax, x[bad_data], y[bad_data], marker='o', mfc='none', linestyle='None', markersize=6, markeredgewidth=2, mec='r') # plt.tick_params(axis='both', which='major', labelsize=10) if tick_font: ax.tick_params(**tick_font) plt.tight_layout()
def test_scatter(): # Set the random seed for consistency np.random.seed(12) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(x, y, label=str(i))
def scatterFreeByMissingEdges(type_header, type_table, config, instanceType, solsPath, solsExt, figName=None): if config not in type_table: raise Exception("Config \"" + config + "\" not found!") if instanceType not in type_table[config]: raise Exception("Instance type \"" + instanceType + "\" not found!") fig, ax = plt.subplots(1) for size in sorted( type_table[config][instanceType].iterkeys()): # instanceType fixedEdges = np.array( map(int, type_table[config][instanceType][size]["preproc.fixedEdges"])) blockedEdges = np.array( map(int, type_table[config][instanceType][size] ["preproc.blockedEdges"])) instanceNumbers = np.array( type_table[config][instanceType][size]["instanceNumber"]) if size == 80: solsPath += '_80-90' missingEdges = [] freeEdges = [] for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges, instanceNumbers): instanceName = "%s_%03d_%02d%s" % (instanceType, size, instanceNumber, solsExt) solEdges = getNEdges(os.path.join(solsPath, instanceName)) mEdges = solEdges - fEdges missingEdges.append(mEdges) frEdges = ((size - 1) * size) / 2 - (fEdges + bEdges) freeEdges.append(frEdges) ppl.scatter(ax, missingEdges, freeEdges, label=str(size)) ppl.legend(ax, loc="lower right") ax.set_xlabel(u'Arestas faltantes') ax.set_ylabel(u'Arestas livres') # ax.set_aspect('equal') ax.set_xlim((0, ax.get_xlim()[1])) ax.set_ylim((0, ax.get_ylim()[1])) # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params') if figName != None: fig.savefig(figName, bbox_inches='tight')
def plot_depth_ratios(depths, ratios, quals, in_file, title): out_file = "%s-depthratios.png" % os.path.splitext(in_file)[0] fig, ax = plt.subplots(1) for ds, rs, qualrange in _group_ratios_by_qual(depths, ratios, quals): print qualrange, len(ds) ppl.scatter(ax, x=depths, y=ratios, label=qualrange) ppl.legend(ax, title="Quality score range") ax.set_title(title) ax.set_xlabel("Depth") ax.set_ylabel("Variant/Total ratio") fig.savefig(out_file)
def test_legend(): # Set the random seed for consistency np.random.seed(12) fig, ax = plt.subplots(1) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(ax, x, y, label=str(i)) ppl.legend(ax)
def plot_ts_diagram(self, ax, sal, temp, xlabel='Salinity', ylabel='Temperature', title='', axis_font={}, title_font={}, tick_font={}, **kwargs): if not axis_font: axis_font = axis_font_default if not title_font: title_font = title_font_default sal = np.ma.array(sal, mask=np.isnan(sal)) temp = np.ma.array(temp, mask=np.isnan(temp)) if len(sal) != len(temp): raise Exception('Sal and Temp arrays are not the same size!') # Figure out boudaries (mins and maxs) smin = sal.min() - (0.01 * sal.min()) smax = sal.max() + (0.01 * sal.max()) tmin = temp.min() - (0.1 * temp.max()) tmax = temp.max() + (0.1 * temp.max()) # Calculate how many gridcells we need in the x and y dimensions xdim = round((smax-smin)/0.1+1, 0) ydim = round((tmax-tmin)+1, 0) # Create empty grid of zeros dens = np.zeros((ydim, xdim)) # Create temp and sal vectors of appropiate dimensions ti = np.linspace(1, ydim-1, ydim)+tmin si = np.linspace(1, xdim-1, xdim)*0.1+smin # Loop to fill in grid with densities for j in range(0, int(ydim)): for i in range(0, int(xdim)): dens[j, i] = sw.dens(si[i], ti[j], 0) # Substract 1000 to convert to sigma-t dens = dens - 1000 # Plot data cs = plt.contour(si, ti, dens, linestyles='dashed', colors='k') plt.clabel(cs, fontsize=12, inline=1, fmt='%1.0f') # Label every second level ppl.scatter(ax, sal, temp, **kwargs) ax.set_xlabel(xlabel.replace("_", " "), labelpad=10, **axis_font) ax.set_ylabel(ylabel.replace("_", " "), labelpad=10, **axis_font) ax.set_title(title.replace("_", " "), **title_font) ax.set_aspect(1./ax.get_data_ratio()) # make axes square if tick_font: ax.tick_params(**tick_font) plt.tight_layout()
def plot_closeness(graph, closenessfile=None): fig, ax = plt.subplots(1) undirected = graph.to_undirected() for connected_component in nx.connected_components(undirected): created = [graph.node[cc].get('weights', 1) for cc in connected_component] closeness = [nx.closeness_centrality(undirected, u=cc) for cc in connected_component] prettyplotlib.scatter(ax, created, closeness) print(closeness) ax.set_xlabel('Changes Authored') ax.set_ylabel('Closeness Centrality') ax.set_ylim(0.0, 1.0) ax.set_xscale('log') if closenessfile: fig.savefig(closenessfile) else: plt.show()
def plot_scatter_k_means_2d(n_clusters, clusters, is_plot=False): if is_plot: class_name = ["Class_1", "Class_2", "Class_3", "Class_4", "Class_5", "Class_6", "Class_7", "Class_8", "Class_9", "Class_10", "Class_11", "Class_12", "Class_12", "Class_13", "Class_14", "Class_15"] colors = ["lime", "aqua","deeppink", "orangered","dodgerblue", "magenta","darkolivegreen","crimson","yellow","darkorchid","dodgerblue", "mediumpurple","hotpink","cyan","orangered" ] fig, ax = plt.subplots() ax.set_xlabel("x axis") ax.set_ylabel("y axis") title = "Plot for K-Means class" ax.set_title(title) for i in range(0, n_clusters): x_axis = tuple(x[0] for x in clusters[i]) y_axis = tuple(x[1] for x in clusters[i]) ppl.scatter(ax,x_axis, y_axis, color=colors[i], label=class_name[i]) ppl.legend(ax) plt.show() fig.savefig('k_means_classification_2d_plot.jpg')
def _render_axes(self, ax_array, lang): comp_series = {} for comp in self.plot.specs['comparators']: y_comp = self.plot.data[self.plot.data.country == comp].value x_comp = self.plot.data[self.plot.data.country == comp].year comp_series[comp] = (x_comp, y_comp) for facet in range(self.plot.specs['facets']): cols = self.plot.specs['cols'] r = facet // cols c = facet % cols ax = ax_array[r][c] country = self.plot.specs['countries'][facet] country = self.plot.index.get_countries(names=[country]) y = self.plot.data[self.plot.data.country == country['en'][0]].value x = self.plot.data[self.plot.data.country == country['en'][0]].year ax_title = country[lang][0] ax.text(0.5, 0.95, ax_title, verticalalignment='bottom', horizontalalignment='center', transform=ax.transAxes, color=self.plot.specs.get('color', None), fontsize=10, fontweight='bold') ax.set_ylim(self.plot.specs['ylim']) if self.plot.specs.get('ystep', None): ax.yaxis.set_ticks(np.arange(self.plot.specs['ylim'][0], self.plot.specs['ylim'][1], self.plot.specs['ystep'])) start, end = self.plot.specs['xlim'] if self.plot.specs.get('xstep', None): ax.xaxis.set_ticks(np.arange(self.plot.specs['xlim'][0], self.plot.specs['xlim'][1], self.plot.specs['xstep'])) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(10) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(10) # Draw in-focus series: strong colour and opaque ppl.plot(ax, x, y, alpha=1.0, linewidth=1, color=self.plot.specs.get('color', None)) ppl.scatter(ax, x, y, s=12.0, alpha=1.0, color=self.plot.specs.get('color', None)) # Draw comparators: light, translucent, overlapping main series for x_comp, y_comp in comp_series.values(): ppl.plot(ax, x_comp, y_comp, alpha=0.35, linewidth=5)
def plot_meshless(args): npoints = args.npoints points = generate_2d_points(npoints, args.seed) x = np.arange(0., 1., .01) y = np.arange(0., 1., .01) xx, yy = np.meshgrid(x, y) import prettyplotlib as ppl fig, ax = ppl.subplots() for mypoint in points: dens = compute_meshless_density(xx, yy, points, mypoint, args.sigma) plt.contourf(xx, yy, dens, alpha=1. / npoints) ppl.scatter(ax, points[:, 0], points[:, 1], color='white') ax.autoscale(tight=True) plt.title(args.subtitle)
def plot_embedding(figure, index, method, run_time, data, classes, dimension): """Scatter subplot `data` with colors corresponding to `classes` on `figure` at position `index` in `dimension`D. Title is made of `method` and `run_time`.""" common = dict(c=classes, cmap=mpl.cm.Spectral, alpha=0.8, s=45) if dimension == 2: axe = figure.add_subplot(1, 1, 1 + index) ppl.scatter(data[:, 0], data[:, 1], **common) elif dimension == 3: axe = figure.add_subplot(5, 3, 1 + index, projection="3d") ppl.scatter(data[:, 0], data[:, 1], data[:, 2], **common) else: raise ValueError(dimension) plt.title("{} ({:.2g} sec)".format(method, run_time)) axe.xaxis.set_major_formatter(mpl.ticker.NullFormatter()) axe.yaxis.set_major_formatter(mpl.ticker.NullFormatter()) if dimension == 3: axe.zaxis.set_major_formatter(mpl.ticker.NullFormatter()) plt.axis('tight')
def scatterFreeByMissingEdges(type_header, type_table, config, instanceType, solsPath, solsExt, figName = None): if config not in type_table: raise Exception("Config \""+config+"\" not found!") if instanceType not in type_table[config]: raise Exception("Instance type \""+instanceType+"\" not found!") fig, ax = plt.subplots(1) for size in sorted(type_table[config][instanceType].iterkeys()): # instanceType fixedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.fixedEdges"])) blockedEdges = np.array(map(int, type_table[config][instanceType][size]["preproc.blockedEdges"])) instanceNumbers = np.array(type_table[config][instanceType][size]["instanceNumber"]) if size == 80: solsPath += '_80-90' missingEdges = [] freeEdges = [] for fEdges, bEdges, instanceNumber in zip(fixedEdges, blockedEdges, instanceNumbers): instanceName = "%s_%03d_%02d%s" % (instanceType, size, instanceNumber, solsExt) solEdges = getNEdges(os.path.join(solsPath, instanceName)) mEdges = solEdges - fEdges missingEdges.append(mEdges) frEdges = ((size-1)*size)/2 - (fEdges + bEdges) freeEdges.append(frEdges) ppl.scatter(ax, missingEdges, freeEdges, label=str(size)) ppl.legend(ax, loc="lower right") ax.set_xlabel(u'Arestas faltantes') ax.set_ylabel(u'Arestas livres') # ax.set_aspect('equal') ax.set_xlim((0, ax.get_xlim()[1])) ax.set_ylim((0, ax.get_ylim()[1])) # ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params') if figName != None: fig.savefig(figName, bbox_inches='tight')
def ppl_scatter (): ''' This function draws a simple prettyplotlib scatter graph that reproduces the graph created in part 2. Note: This function requires "prettyplotlib" library. ''' np.random.seed(12) ax = fig.add_subplot(1,3,3) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(ax, x, y, label=str(i)) ppl.legend(ax,loc=4,fontsize=11) ax.set_title('A prettyplotlib `scatter` example\n' 'showing default color cycle and scatter params',fontsize=12)
def table_touch(dataset = True): fig, ax = plt.subplots(1) ax.set_yscale('log') data = read_csv(['touch', 'counts'], True) #ppl.bar(ax, range(len(data['touch'])), data['counts'], xticklabels=data['touch'], grid='y', log=True) ppl.scatter(ax, data['touch'], data['counts'], label="SDSS", marker="o", s=100) if dataset: data = read_csv(['dataset_touch'], False) ppl.scatter(ax, data['dataset_touch'], data['count'], label="SQLShare (Dataset)", marker="v", s=100, color=pcs[0]) else: data = read_csv(['touch'], False) ppl.scatter(ax, data['touch'], data['count'], label="SQLShare", marker="v", s=100, color=pcs[1]) ax.set_xlabel('Table touch') ax.set_ylabel('# of queries') ppl.legend(ax) ax.set_ylim(0) plt.show() if dataset: fig.savefig('plot_touch_dataset.pdf', format='pdf', transparent=True) fig.savefig('plot_touch_dataset.png', format='png', transparent=True) else: fig.savefig('plot_touch.pdf', format='pdf', transparent=True) fig.savefig('plot_touch.png', format='png', transparent=True)
def df_scatter_plot(df, x=None, y=None, label=None, **kwargs): if label is None: if len(df.columns) != 3: raise ValueError("I can't (or rather won't) guess the label if there's not exactly 3 columns. " "You need to specify it") else: label = [t for t in df.columns if t not in [x, y]][0] colors = kwargs.pop('colors', None) label_list = kwargs.pop('label_list', np.array(df[label].unique())) fig, ax = mpl_plt.subplots(1) for i, this_label in enumerate(label_list): d = df[df[label] == this_label] xvals = np.array(d[x]) yvals = np.array(d[y]) if colors: ppl.scatter(ax, xvals, yvals, label=str(i), facecolor=colors[i], **kwargs) else: ppl.scatter(ax, xvals, yvals, label=str(i), **kwargs) ppl.legend(ax)
def ppl_scatter(): ''' This function draws a simple prettyplotlib scatter graph that reproduces the graph created in part 2. Note: This function requires "prettyplotlib" library. ''' np.random.seed(12) ax = fig.add_subplot(1, 3, 3) # Show the whole color range for i in range(8): x = np.random.normal(loc=i, size=1000) y = np.random.normal(loc=i, size=1000) ppl.scatter(ax, x, y, label=str(i)) ppl.legend(ax, loc=4, fontsize=11) ax.set_title( 'A prettyplotlib `scatter` example\n' 'showing default color cycle and scatter params', fontsize=12)
def plot_time_series(fig, ax, x, y, fill=False, title='', ylabel='', title_font={}, axis_font={}, **kwargs): if not title_font: title_font = title_font_default if not axis_font: axis_font = axis_font_default h = ppl.plot(ax, x, y, **kwargs) ppl.scatter(ax, x, y, **kwargs) get_time_label(ax, x) fig.autofmt_xdate() if ylabel: ax.set_ylabel(ylabel, **axis_font) if title: ax.set_title(title, **title_font) if 'degree' in ylabel: ax.set_ylim([0, 360]) ax.grid(True) if fill: miny = min(ax.get_ylim()) ax.fill_between(x, y, miny + 1e-7, facecolor=h[0].get_color(), alpha=0.15) # plt.subplots_adjust(top=0.85) plt.tight_layout()
def opcounts(): fig, ax = plt.subplots(1, figsize=(8, 4)) ax.set_yscale('log') data = read_csv(['physops', 'counts'], True) #ppl.bar(ax, data['ops'], data['counts'], grid='y', log=True) y, x = np.array(np.histogram(data['physops'], 10, weights=data['counts'])) w = x[1] - x[0] x += w/2 data = [a for a in zip(list(x), list(y)) if a[1]] x = [i[0] for i in data] y = [i[1] for i in data] ppl.scatter(ax, x=x, y=y, marker="o", color=pcs[0], s=100, label="SDSS") data = read_csv(['ops'], False) d = data['ops'] y, x = np.histogram(d, bins=np.linspace(min(d), max(d), (max(d) - min(d)) / w), weights=data['count']) x += w/2 data = [a for a in zip(list(x), list(y)) if a[1]] x = [i[0] for i in data] y = [i[1] for i in data] ppl.scatter(ax, x=x, y=y, marker="v", color=pcs[1], s=100, label="SQLShare") ax.set_xlabel('Physical operators used') ax.set_ylabel('# of queries') ppl.legend(ax, loc='lower right') ax.set_xlim(0) ax.set_ylim(0) fig.tight_layout() plt.show() fig.savefig('plot_logops_query.pdf', format='pdf', transparent=True) fig.savefig('plot_logops_query.png', format='png', transparent=True)
def plot_clusters(clusters, candidates, bounds, vloc, hulls, shrink=0.9): """Plot all `clusters` among `candidates` with the `bounds` of the city (or at least `shrink` of them). Also plot convex `hulls` of gold areas if provided.""" xbounds, ybounds = bounds unique_labels = len(clusters) clustered = set().union(*map(list, clusters)) noise = list(candidates.difference(clustered)) if unique_labels > 5: colors = mpl.cm.Spectral(np.linspace(0, 1, unique_labels + 1)) else: colors = [gray, red, green, blue, orange] plt.figure(figsize=(20, 15)) for k, indices, col in zip(range(unique_labels + 1), [noise] + clusters, colors): k -= 1 if k == -1: col = 'gray' ppl.scatter(vloc[indices, 0], vloc[indices, 1], s=35 if k != -1 else 16, color=col, alpha=0.8 if k != -1 else 0.6, label='noise' if k == -1 else 'cluster {}'.format(k + 1)) hulls = hulls or [] for idx, hull in enumerate(hulls): first_again = range(len(hull)) + [0] ppl.plot(hull[first_again, 0], hull[first_again, 1], '--', c=ppl.colors.almost_black, lw=1.0, alpha=0.9, label='gold region' if idx == 0 else None) plt.xlim(shrink * xbounds) plt.ylim(shrink * ybounds) ppl.legend()
def plot_scatter(fig, ax, x, y, title='', xlabel='', ylabel='', title_font={}, axis_font={}, **kwargs): if not title_font: title_font = title_font_default if not axis_font: axis_font = axis_font_default ppl.scatter(ax, x, y, **kwargs) if xlabel: ax.set_xlabel(xlabel, labelpad=10, **axis_font) if ylabel: ax.set_ylabel(ylabel, labelpad=10, **axis_font) ax.set_title(title, **title_font) ax.grid(True) ax.set_aspect(1. / ax.get_data_ratio()) # make axes square
def xy_linear_regression(df, x_var, y_var, **kwargs): fit_intercept = kwargs.get('fit_intercept', True) d = df[[x_var, y_var]] d = d.dropna() x = np.matrix(d[x_var]).transpose() y = np.matrix(d[y_var]).transpose() clf = linear_model.LinearRegression( fit_intercept=kwargs.get('fit_intercept', True), normalize=kwargs.get('normalize', False)) clf.fit(x, y) intercept, slope = clf.intercept_[0], clf.coef_[0][0] if kwargs.get('plot', False): ppl.scatter(x=d[x_var], y=d[y_var]); x_line = np.array([0, d[x_var].max()]) plt.plot(x_line, intercept + slope * x_line, 'k-') plt.annotate('slope={slope:.4f}\nintercept={intercept:.4f}'.format(slope=slope, intercept=intercept), (0.05, 0.9), xycoords='axes fraction') plt.xlabel(x_var) plt.ylabel(y_var) return intercept, slope
def make_plots(d,correct): men = d[0] women = d[1] if correct: ay = convert_times(men) Ym = correct_times('m',ay) Xm = convert_dates(men) print "hola" print len(Xm) print len(Ym) ppl.scatter(ax,Xm,Ym,label="Men's speeds") aw = convert_times(women) Yw = correct_times('w',aw) Xw = convert_dates(women) ppl.scatter(ax,Xw,Yw,label="Women's speeds") ppl.legend(ax) ax.set_title("Bay To Breakers Speeds (Seconds per Mile)") fig.savefig("b2bwinningtimescorrected.png") else: Ym = convert_times(men) Xm = convert_dates(men) ppl.scatter(ax,Xm,Ym,label="Men's Times") Yw = convert_times(women) Xw = convert_dates(women) ppl.scatter(ax,Xw,Yw,label="Women's Times") ppl.legend(ax) ax.set_title("Bay To Breakers times (seconds)") fig.savefig("b2bwinningtimes.png")
def plot_pca(df, c_scale=None, x_pc=1, y_pc=2, distance='L1', \ save_as=None, save_format='png', whiten=True, num_vectors=30, \ figsize=(10, 10), colors_dict=None, markers_dict=None, \ title='PCA', show_vectors=True, show_point_labels=True, \ show_vector_labels=True, column_ids_dict=None): # gather ids and values row_ids = df.index if column_ids_dict is not None: column_ids = [column_ids_dict[col] for col in df.columns] else: column_ids = df.columns df_array = df.as_matrix() # perform pca n_components = max(x_pc, y_pc, 2) pca = dc.PCA(whiten=whiten, n_components=n_components) pca.fit(df_array) X = pca.transform(df_array) (comp_x, comp_y) = (pca.components_[x_pc - 1, :], pca.components_[y_pc - 1, :]) x_list = X[:, x_pc - 1] y_list = X[:, y_pc - 1] if not c_scale: c_scale = .75 * max([norm(point) for point in zip(x_list, y_list)]) / \ max([norm(vector) for vector in zip(comp_x, comp_y)]) size_scale = sqrt(figsize[0] * figsize[1]) / 1.5 # sort features by magnitude/contribution to transformation comp_magn = [] for (x, y, an_id) in zip(comp_x, comp_y, column_ids): x = x * c_scale y = y * c_scale if distance == 'L1': comp_magn.append((x, y, an_id, abs(y) + abs(x))) elif distance == 'L2': comp_magn.append((x, y, an_id, math.sqrt((y**2) + (x**2)))) # create figure and plot pca_fig, ax = plt.subplots(figsize=figsize) for (x, y, an_id) in zip(x_list, y_list, row_ids): if colors_dict: try: color = colors_dict[an_id] except: color = 'black' else: color = 'black' if markers_dict: try: marker = markers_dict[an_id] except: marker = 'x' else: marker = 'x' if show_point_labels: ax.text(x, y, an_id, color=color, size=size_scale) ppl.scatter(ax, x, y, marker=marker, color=color, s=size_scale * 5) vectors = sorted(comp_magn, key=lambda item: item[3], reverse=True)[:num_vectors] for x, y, marker, distance in vectors: if show_vectors: ppl.plot(ax, [0, x], [0, y], color=ppl.almost_black, linewidth=.5) if show_vector_labels: ax.text(x, y, marker, color='black', size=size_scale) var_1 = int(pca.explained_variance_ratio_[x_pc - 1] * 100) var_2 = int(pca.explained_variance_ratio_[y_pc - 1] * 100) ax.set_xlabel('Principal Component {} (Explains {}% Of Variance)'.format( str(x_pc), str(var_1)), size=size_scale * 2) ax.set_ylabel('Principal Component {} (Explains {}% Of Variance)'.format( str(y_pc), str(var_2)), size=size_scale * 2) ax.set_title(title, size=size_scale * 3) if save_as: kwargs = {} if save_format == 'png': kwargs['dpi'] = 300 pca_fig.savefig(save_as, format=save_format, **kwargs) return vectors
# Show the whole color range for s in station_list: if "data" in s: years = s["data"].keys() # Only show the stations with enough data. if len(s["data"].keys()) >= num_years_required: xx = [] yx = [] for y in years: xx.append(int(y)) val = s["data"][y]["max"] yx.append(val) ax.scatter(xx, yx, marker='o') ppl.scatter(ax, xx, yx, alpha=0.8, edgecolor='black', linewidth=0.15, label=str(s["station_num"])) ppl.legend(ax, loc='right', ncol=1) ax.set_xlabel('Year') ax.set_ylabel('water level (m)') ax.set_title("Stations exceeding " + str(num_years_required) + " years worth of water level data (MHHW)") fig.set_size_inches(14, 8) # <markdowncell> # ### Number of stations available by number of years # <codecell>
from numpy.random import multivariate_normal from numpy import vstack import numpy as np from matplotlib import pyplot as plt import prettyplotlib as ppl center1 = np.array([1,1]) center2 = np.array([-1,-1]) center3 = np.array([-1,1]) var_scale = 0.01 sample_size = 1000 x1 = multivariate_normal(center1,var_scale*np.eye(2),sample_size) x2 = multivariate_normal(center2,var_scale*np.eye(2),sample_size) x3 = multivariate_normal(center3,var_scale*np.eye(2),sample_size) X = vstack((x1,x2,x3)) #np.savetxt("./test_clusters/guassian3.csv",X,delimiter=",") ppl.scatter(X[:,0],X[:,1]) plt.show()
import prettyplotlib as ppl import matplotlib.pyplot as plt import numpy as np fig, ax = plt.subplots(1) ax.set(aspect = 1) ax.set_autoscaley_on(False) ax.set_autoscalex_on(False) ax.set_xlim(0, 0.35) ax.set_ylim(0, 0.35) data1 = np.genfromtxt('error_inv.csv', dtype=float, delimiter=',') data2 = np.genfromtxt('error_var.csv', dtype=float, delimiter=',') data1_mod = np.sqrt(np.mean(data1, 1)) - 0.0165 data2_mod = np.sqrt(np.mean(data2, 1)) - 0.01 count = 0 for i in range(0, data1_mod.shape[0]): if data1_mod[i] <= data2_mod[i]: count += 1 print count ax.set_xlabel('RMSE regular') ax.set_ylabel('RMSE rotation invariant') ppl.scatter(ax, data2_mod, data1_mod, facecolor='#66c2a5', s=1) ppl.plot([0, 0.35], [0, 0.35], '#fc8d62', linewidth=1) fig.savefig('scatter.pdf')
# Project the data to a 2D space for visualization from sklearn.decomposition import RandomizedPCA Xp = RandomizedPCA(n_components=2, random_state=1).fit_transform(X) # Setup matplotlib to work interactively from matplotlib import pyplot as plt import prettyplotlib as ppl # Plot individuals populations = np.unique(y) # colors = plt.get_cmap("hsv") f, ax = plt.subplots(figsize=(10, 4)) for i, p in enumerate(populations): mask = (y == p) ppl.scatter(ax, Xp[mask, 0], Xp[mask, 1], label=p) plt.xlim([-50, 100]) ppl.legend(ax, loc=1) plt.savefig("randomized_pca.png") # Learn with scikit-learn # ----------------------- from matplotlib import pyplot as plt from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) from sklearn.metrics import confusion_matrix
if "data" in s: years = s["data"].keys() # Only show the stations with enough data. if len(s["data"].keys()) >= num_years_required: xx = [] yx = [] for y in years: xx.append(int(y)) val = s["data"][y]["max"] yx.append(val) ax.scatter(xx, yx, marker='o') ppl.scatter(ax, xx, yx, alpha=0.8, edgecolor='black', linewidth=0.15, label=str(s["station_num"])) ppl.legend(ax, loc='right', ncol=1) ax.set_xlabel('Year') ax.set_ylabel('water level (m)') ax.set_title("Stations exceeding " + str(num_years_required) + " years worth of water level data (MHHW)") fig.set_size_inches(14, 8) # <markdowncell> # ### Number of stations available by number of years
def ntt_proposal_figure2(): """ Plot from NTT telsecope proposal """ import yaml from load import load import cPickle as pickle import numpy as np from astropy.table import Table from model import model from pl import pl from scipy.interpolate import interp1d import matplotlib.pyplot as plt with open('input.yml', 'r') as f: parfile = yaml.load(f) fittingobj = load(parfile) wavlen = fittingobj.get_wavlen() with open('/data/lc585/QSOSED/Results/140811/allsample_2/fluxcorr.array','rb') as f: flxcorr = pickle.load(f) flxcorr = np.ones(len(wavlen)) zromag = fittingobj.get_zromag() bp = fittingobj.get_bp() dlam = fittingobj.get_dlam() tab1 = Table.read('/data/lc585/QSOSED/Results/140912/lowbetatab_3.fits') tab2 = Table.read('/data/lc585/QSOSED/Results/140912/highbetatab_3.fits') magcolumns = ['SDSS_UMAG', 'SDSS_GMAG', 'SDSS_RMAG', 'SDSS_IMAG', 'SDSS_ZMAG', 'UKIDSS_YMAG', 'UKIDSS_JMAG', 'UKIDSS_HMAG', 'UKIDSS_KMAG', 'ALLWISE_W1MAG', 'ALLWISE_W2MAG', 'ALLWISE_W3MAG', 'ALLWISE_W4MAG'] datmag1 = np.array([np.array(tab1[i]) for i in magcolumns]) datmag1 = datmag1 - datmag1[3, :] + 18.0 datmag2 = np.array([np.array(tab2[i]) for i in magcolumns]) datmag2 = datmag2 - datmag2[3, :] + 18.0 plslp1 = 0.46 plslp2 = 0.03 plbrk = 2822.50 bbt = 1216.32 bbflxnrm = 0.24 galfra = 0.31 elscal = 0.71 scahal = 0.86 ebv = 0.0 magtmp, wavlentmp, fluxtmp = model(plslp1, plslp2, plbrk, bbt, bbflxnrm, elscal, scahal, galfra, ebv, 18.0, 2., fittingobj, flxcorr, parfile) wavnumjoin = (np.abs(wavlen - parfile['runhotdustplfit']['wavmin_bbpl'] )).argmin() wavnummax = (np.abs(wavlen - parfile['runhotdustplfit']['wavmax_bbpl'] )).argmin() slope = 1.852337650258 nrm = fluxtmp[wavnumjoin] / (wavlen[wavnumjoin]**(slope - 2.0)) plmodel = pl(wavlen,slope,nrm) newflux = np.zeros(len(wavlen)) newflux[:wavnumjoin] = fluxtmp[:wavnumjoin] newflux[wavnumjoin:] = plmodel[wavnumjoin:] # Calculate normalised model flux spc = interp1d(wavlentmp,newflux,bounds_error=False,fill_value=0.0) sum1 = np.sum( bp[3][1] * spc(bp[3][0]) * bp[3][0] * dlam[3]) sum2 = np.sum( bp[3][1] * bp[3][0] * dlam[3]) flxlam = sum1 / sum2 flxlam = flxlam + 1e-200 imag = (-2.5 * np.log10(flxlam)) - zromag[3] delta_m = 18.0 - imag # what i must add to model magnitude to match data fnew = newflux * 10**(-0.4 * delta_m) # this is normalised flux in erg/cm^2/s/A ### Calculate model with beta = 0 for comparison. slope = 0.9119355988796 nrm = fluxtmp[wavnumjoin] / (wavlen[wavnumjoin]**(slope - 2.0)) plmodel2 = pl(wavlen, slope, nrm) newflux2 = np.zeros(len(wavlen)) newflux2[:wavnumjoin] = fluxtmp[:wavnumjoin] newflux2[wavnumjoin:] = plmodel2[wavnumjoin:] # Calculate normalised model flux spc = interp1d(wavlentmp,newflux2,bounds_error=False,fill_value=0.0) sum1 = np.sum( bp[3][1] * spc(bp[3][0]) * bp[3][0] * dlam[3]) sum2 = np.sum( bp[3][1] * bp[3][0] * dlam[3]) flxlam = sum1 / sum2 flxlam = flxlam + 1e-200 imag = (-2.5 * np.log10(flxlam)) - zromag[3] delta_m = 18.0 - imag # what i must add to model magnitude to match data fnew2 = newflux2 * 10**(-0.4 * delta_m) # this is normalised flux in erg/cm^2/s/A flam, lameff = np.zeros((len(tab1),13)), np.zeros((len(tab1),13)) for obj in range(len(tab1)): lameff[obj,:] = fittingobj.get_lameff() / (1.0 + tab1[obj]['Z_1']) datmagtmp = datmag1[:,obj] # Calculate data fluxes from magnitudes f_0 = np.zeros(len(bp)) # flux zero points for ftr in range(len(bp)): sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr]) sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr]) f_0[ftr] = sum1 / sum2 flam[obj,:] = f_0 * 10.0**( -0.4 * datmagtmp ) # data fluxes in erg/cm^2/s/A flam_2, lameff_2 = np.zeros((len(tab1),13)), np.zeros((len(tab1),13)) for obj in range(len(tab2)): lameff_2[obj,:] = fittingobj.get_lameff() / (1.0 + tab2[obj]['Z_1']) datmagtmp = datmag2[:,obj] # Calculate data fluxes from magnitudes f_0 = np.zeros(len(bp)) # flux zero points for ftr in range(len(bp)): sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr]) sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr]) f_0[ftr] = sum1 / sum2 flam_2[obj,:] = f_0 * 10.0**( -0.4 * datmagtmp ) # data fluxes in erg/cm^2/s/A # Manda's Very Red Quasars redcat = np.genfromtxt('/data/lc585/QSOSED/Results/140920/Red_Quasar_photom.cat') flam_3, lameff_3 = np.zeros((len(redcat),3)), np.zeros((len(redcat),3)) for obj in range(len(redcat)): lameff_3[obj,0] = 33680.0 / (1.0 + redcat[obj,12]) lameff_3[obj,1] = 46180.0 / (1.0 + redcat[obj,12]) lameff_3[obj,2] = 120000.0 / (1.0 + redcat[obj,12]) # Calculate data fluxes from magnitudes f_0 = np.zeros(len(bp)) # flux zero points for ftr in range(len(bp)): sum1 = np.sum( bp[ftr][1] * (0.10893/(bp[ftr][0]**2)) * bp[ftr][0] * dlam[ftr]) sum2 = np.sum( bp[ftr][1] * bp[ftr][0] * dlam[ftr]) f_0[ftr] = sum1 / sum2 flam_3[obj,0] = f_0[9] * 10.0**( -0.4 * redcat[obj,2] ) flam_3[obj,1] = f_0[10] * 10.0**( -0.4 * redcat[obj,3] ) flam_3[obj,2] = f_0[11] * 10.0**( -0.4 * redcat[obj,4] ) nrm = fnew[520] * wavlen[520] flam = flam / nrm flam_2 = flam_2 / nrm flam_3 = flam_3 / nrm w1med = np.median(lameff[:,9]*flam[:,9]) w2med = np.median(lameff[:,10]*flam[:,10]) w3med = np.median(lameff[:,11]*flam[:,11]) w1med_2 = np.median(lameff_2[:,9]*flam_2[:,9]) w2med_2 = np.median(lameff_2[:,10]*flam_2[:,10]) w3med_2 = np.median(lameff_2[:,11]*flam_2[:,11]) w1err = np.std(lameff[:,9]*flam[:,9]) w2err = np.std(lameff[:,10]*flam[:,10]) w3err = np.std(lameff[:,11]*flam[:,11]) w1err_2 = np.std(lameff_2[:,9]*flam_2[:,9]) w2err_2 = np.std(lameff_2[:,10]*flam_2[:,10]) w3err_2 = np.std(lameff_2[:,11]*flam_2[:,11]) w1lam = np.median(lameff[:,9]) w2lam = np.median(lameff[:,10]) w3lam = np.median(lameff[:,11]) w1lam_2 = np.median(lameff_2[:,9]) w2lam_2 = np.median(lameff_2[:,10]) w3lam_2 = np.median(lameff_2[:,11]) # print w1med, w2med, w3med, w1med_2, w2med_2, w3med_2, w1lam, w2lam, w3lam, w1lam_2, w2lam_2, w3lam_2 import matplotlib import prettyplotlib as ppl fig = plt.figure(figsize=(5,3)) ax = fig.add_subplot(1,1,1) fnew = fnew / nrm fnew2 = fnew2 / nrm ax.plot(wavlen,wavlen*fnew,color='black') ax.plot(wavlen[1900:],wavlen[1900:]*fnew2[1900:],color='black') ax.errorbar(w1lam,w1med,yerr=w1err,color='blue') ax.errorbar(w2lam,w2med,yerr=w2err,color='blue') ax.errorbar(w3lam,w3med,yerr=w3err,color='blue') ax.errorbar(w1lam_2,w1med_2,yerr=w1err_2,color='red') ax.errorbar(w2lam_2,w2med_2,yerr=w2err_2,color='red') ax.errorbar(w3lam_2,w3med_2,yerr=w3err_2,color='red') # ppl.scatter(lameff_3[:,0] , lameff_3[:,0] * flam_3[:,0], color='grey', alpha=0.5) # ppl.scatter(lameff_3[:,1] , lameff_3[:,1] * flam_3[:,1], color='grey', alpha=0.5) # ppl.scatter(lameff_3[:,2] , lameff_3[:,2] * flam_3[:,2], color='grey', alpha=0.5) # Or plot lines for i in range(len(lameff_3)): xdat = np.array([lameff_3[i,0],lameff_3[i,1],lameff_3[i,2]]) ydat = np.array([lameff_3[i,0] * flam_3[i,0], lameff_3[i,1] * flam_3[i,1], lameff_3[i,2] * flam_3[i,2]]) nrmind = np.argmin( np.abs(wavlen - xdat[0]) ) ydat = ydat * (wavlen[nrmind] * fnew[nrmind]) / ydat[0] ppl.scatter(xdat,ydat, color='grey', alpha=0.5) ppl.plot(xdat,ydat,color='grey',alpha=0.2) ax.set_xlim(1200,50000 ) ax.set_ylim(0,3.5) ax.set_ylabel(r'Relative Flux $\lambda F_{\lambda}(\lambda)$',fontsize=10) ax.set_xlabel(r'Rest-frame Wavelength ($\AA$)',fontsize=10) plt.text(17699,0.21607,r'$\beta_{\rm NIR}=-0.09$',fontsize=8,horizontalalignment='left',verticalalignment='top') plt.text(17000,0.6,r'$\beta_{\rm NIR}=0.85$',fontsize=8,rotation=20.0,horizontalalignment='left',verticalalignment='bottom') plt.text(6604,2.23966,r'H$\alpha$',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom') plt.text(4902,1.21462,r'H$\beta$ & OIII',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom') plt.text(1558,2.3328,r'CIV',fontsize=8,rotation=90.0,horizontalalignment='center',verticalalignment='bottom') plt.tick_params(axis='both',which='major',labelsize=8) ax.set_xscale('log') ax.set_xticks([2000,5000,10000,20000,40000]) ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter()) plt.tight_layout() # plt.savefig('/home/lc585/Dropbox/IoA/NTT_Proposal_95A/esoform-95A/figure2_v2.pdf') plt.show() return None
def plot_ts_diagram(self, ax, sal, temp, xlabel='Salinity', ylabel='Temperature', title='', axis_font={}, title_font={}, tick_font={}, **kwargs): if not axis_font: axis_font = axis_font_default if not title_font: title_font = title_font_default sal = np.ma.array(sal, mask=np.isnan(sal)) temp = np.ma.array(temp, mask=np.isnan(temp)) if len(sal) != len(temp): raise Exception('Sal and Temp arrays are not the same size!') # Figure out boudaries (mins and maxs) smin = sal.min() - (0.01 * sal.min()) smax = sal.max() + (0.01 * sal.max()) tmin = temp.min() - (0.1 * temp.max()) tmax = temp.max() + (0.1 * temp.max()) # Calculate how many gridcells we need in the x and y dimensions xdim = round((smax - smin) / 0.1 + 1, 0) ydim = round((tmax - tmin) + 1, 0) # Create empty grid of zeros dens = np.zeros((ydim, xdim)) # Create temp and sal vectors of appropiate dimensions ti = np.linspace(1, ydim - 1, ydim) + tmin si = np.linspace(1, xdim - 1, xdim) * 0.1 + smin # Loop to fill in grid with densities for j in range(0, int(ydim)): for i in range(0, int(xdim)): dens[j, i] = sw.dens(si[i], ti[j], 0) # Substract 1000 to convert to sigma-t dens = dens - 1000 # Plot data cs = plt.contour(si, ti, dens, linestyles='dashed', colors='k') plt.clabel(cs, fontsize=12, inline=1, fmt='%1.0f') # Label every second level ppl.scatter(ax, sal, temp, **kwargs) ax.set_xlabel(xlabel.replace("_", " "), labelpad=10, **axis_font) ax.set_ylabel(ylabel.replace("_", " "), labelpad=10, **axis_font) ax.set_title(title.replace("_", " "), **title_font) ax.set_aspect(1. / ax.get_data_ratio()) # make axes square if tick_font: ax.tick_params(**tick_font) plt.tight_layout()
def plot_time_series(self, fig, is_timeseries, ax, x, y, fill=False, title='', xlabel='', ylabel='', title_font={}, axis_font={}, tick_font={}, scatter=False, qaqc=[], events={}, **kwargs): if not title_font: title_font = title_font_default if not axis_font: axis_font = axis_font_default if scatter: ppl.scatter(ax, x, y, **kwargs) else: h = ppl.plot(ax, x, y, **kwargs) if is_timeseries: self.get_time_label(ax, x) fig.autofmt_xdate() else: ax.set_xlabel(xlabel.replace("_", " "), **axis_font) if ylabel: ax.set_ylabel(ylabel.replace("_", " "), **axis_font) if title: ax.set_title(title.replace("_", " "), **title_font) ax.grid(True) if fill: miny = min(ax.get_ylim()) if not scatter: ax.fill_between(x, y, miny + 1e-7, facecolor=h[0].get_color(), alpha=0.15) else: ax.fill_between(x, y, miny + 1e-7, facecolor=axis_font_default['color'], alpha=0.15) if events: ylim = ax.get_ylim() for event in events['events']: time = datestr2num(event['start_date']) x = np.array([time, time]) h = ax.plot(x, ylim, '--', label=event['class']) legend = ax.legend() if legend: for label in legend.get_texts(): label.set_fontsize(10) if len(qaqc) > 0: bad_data = np.where(qaqc > 0) h = ppl.plot(ax, x[bad_data], y[bad_data], marker='o', mfc='none', linestyle='None', markersize=6, markeredgewidth=2, mec='r') # plt.tick_params(axis='both', which='major', labelsize=10) if tick_font: ax.tick_params(**tick_font) plt.tight_layout()
plt.savefig('test/u_SDSS_vs_GC') ''' limits = [[12, 18.5], [11.4, 16.5], [10, 16], [9.5, 16], [9.5, 16]] for i, band in enumerate(bands): lim_lo = limits[i][0] lim_hi = limits[i][1] x, m, b = makeDiagonalLine([lim_lo, lim_hi]) fig = plt.figure(figsize=(8, 8)) ax = plt.subplot(111) c = ppl.scatter(ax, petroMags[:, i], mags[:, i], s=8, c='k', edgecolor='k') ax.axis([lim_lo, lim_hi, lim_lo, lim_hi]) ax.errorbar(petroMags[:, i], mags[:, i], yerr=mag_err[:, i], mew=0, linestyle = "none", color="black") plt.plot(x,m*x + b, c='k') ax.xaxis.set_major_locator(majorLocator) ax.xaxis.set_minor_locator(minorLocator) ax.yaxis.set_major_locator(majorLocator) ax.yaxis.set_minor_locator(minorLocator) ax.minorticks_on() # Change the labels back to black ax.xaxis.label.set_color('black') ax.yaxis.label.set_color('black') # Change the axis title also back to black ax.title.set_color('black') # Get back the top and right axes lines ("spines") spines_to_remove = ['top', 'right']
def plot_pca(df, c_scale=None, x_pc=1, y_pc=2, distance='L1', \ save_as=None, save_format='png', whiten=True, num_vectors=30, \ figsize=(10, 10), colors_dict=None, markers_dict=None, \ title='PCA', show_vectors=True, show_point_labels=True, \ show_vector_labels=True, column_ids_dict=None): # gather ids and values row_ids = df.index if column_ids_dict is not None: column_ids = [column_ids_dict[col] for col in df.columns] else: column_ids = df.columns df_array = df.as_matrix() # perform pca n_components = max(x_pc, y_pc, 2) pca = dc.PCA(whiten=whiten, n_components=n_components) pca.fit(df_array) X = pca.transform(df_array) (comp_x, comp_y) = ( pca.components_[x_pc - 1, :], pca.components_[y_pc - 1, :]) x_list = X[:, x_pc - 1] y_list = X[:, y_pc - 1] if not c_scale: c_scale = .75 * max([norm(point) for point in zip(x_list, y_list)]) / \ max([norm(vector) for vector in zip(comp_x, comp_y)]) size_scale = sqrt(figsize[0] * figsize[1]) / 1.5 # sort features by magnitude/contribution to transformation comp_magn = [] for (x, y, an_id) in zip(comp_x, comp_y, column_ids): x = x * c_scale y = y * c_scale if distance == 'L1': comp_magn.append((x, y, an_id, abs(y) + abs(x))) elif distance == 'L2': comp_magn.append((x, y, an_id, math.sqrt((y ** 2) + (x ** 2)))) # create figure and plot pca_fig, ax = plt.subplots(figsize=figsize) for (x, y, an_id) in zip(x_list, y_list, row_ids): if colors_dict: try: color = colors_dict[an_id] except: color = 'black' else: color = 'black' if markers_dict: try: marker = markers_dict[an_id] except: marker = 'x' else: marker = 'x' if show_point_labels: ax.text(x, y, an_id, color=color, size=size_scale) ppl.scatter(ax, x, y, marker=marker, color=color, s=size_scale * 5) vectors = sorted(comp_magn, key=lambda item: item[3], reverse=True)[ :num_vectors] for x, y, marker, distance in vectors: if show_vectors: ppl.plot(ax, [0, x], [0, y], color=ppl.almost_black, linewidth=.5) if show_vector_labels: ax.text(x, y, marker, color='black', size=size_scale) var_1 = int(pca.explained_variance_ratio_[x_pc - 1] * 100) var_2 = int(pca.explained_variance_ratio_[y_pc - 1] * 100) ax.set_xlabel( 'Principal Component {} (Explains {}% Of Variance)'.format(str(x_pc), str(var_1)), size=size_scale * 2) ax.set_ylabel( 'Principal Component {} (Explains {}% Of Variance)'.format(str(y_pc), str(var_2)), size=size_scale * 2) ax.set_title(title, size=size_scale * 3) if save_as: kwargs = {} if save_format == 'png': kwargs['dpi'] = 300 pca_fig.savefig(save_as, format=save_format, **kwargs) return vectors
for y in years: val = s["data"][y]["max"] if val is not None: try: #round to 2dp val = "%.2f" % val yx.append(val) xx.append(int(y)) except: pass #ppl.scatter(ax, xx, yx,alpha=0.8,edgecolor='black',linewidth=0.15 ,label=str(s["station_num"])+":"+str(s["long_name"][0])) ppl.scatter(ax, xx, yx, alpha=0.8, edgecolor='black', linewidth=0.15, label=str(s["long_name"])) ax.legend(loc=1) ax.set_title( 'Annual Max sea surface wave significant height (m) (Observed & Model)') ax.set_xlabel('Year') ax.set_ylabel('sea surface wave significant height (m)') ax.set_xticks(numpy.arange(st_yr, ed_yr, 2)) fig.set_size_inches(14, 8) # Shink current axis by 20% box = ax.get_position()
print "Pi_hat = ", np.mean(pi_hat) # mostramos el valor medio de pi para las nn estimaciones print "Pi real = 3.14159265359" # el valor real de pi print "ECM =", np.mean(np.subtract(3.14159265359,pi_hat) ** 2) # Funciones auxiliares para el grafico # necesitamos solo los valores aleatorios que cumplen la condicion de circunferencia u_f = [] v_f = [] for i,e in enumerate(u): if np.sqrt((u[i]-0.5 )** 2 + (v[i]-0.5) ** 2) < 0.5: u_f += [u[i]] v_f += [v[i]] plt.figure(figsize=(6,12)) ax1 = subplot(211) # gráfico de la simulación de montercarlo ax1.axhline(0.5, color="grey", alpha=0.5) ax1.axvline(0.5, color="grey", alpha=0.5) ax1.set_title(u"Simulación Montecarlo", fontsize=14) ppl.scatter(u,v) ppl.scatter(u_f,v_f, linewidth=0.08) ax2 = subplot(212) # gráfico de las nn estimaciones de pi ppl.scatter(range(len(pi_hat)),pi_hat) ax2.axhline(3.14159265359, color="grey", alpha=0.5, linewidth=2) text(len(pi_hat)*1.01, 3.14159265359, "Pi = 3.1415...") ax2.set_title(u"Estimación de Pi", fontsize=14) # plt.show() # quitar el \"#" si usas el shell #plt.savefig("simulacion_pi_montercalo.png", dpi=200) # quitar el \# para guardar la imagen.
def plot(x, y, **kwargs): ppl.scatter(x, y, True, **kwargs)
sumpeak = np.array([]) suminterval = np.array([]) sumfilterpeak = np.array([]) sumfilterinterval = np.array([]) for time in processlist: time = time.astype(int) loaddata = pathfilename+str(time[0])+'_'+str(time[-1])+'.npz' sumpeak = np.append(sumpeak, np.load(loaddata)['totalpeak']) suminterval = np.append(suminterval, np.load(loaddata)['totalinterval']) sumfilterpeak = np.append(sumfilterpeak, np.load(loaddata)['totalfilterpeak']) sumfilterinterval = np.append(sumfilterinterval, np.load(loaddata)['totalfilterinterval']) os.remove(loaddata) fig, ax = plt.subplots(1) ppl.scatter(ax, sumpeak, suminterval) ppl.scatter(ax, sumfilterpeak, sumfilterinterval) ax.set_title(savefilename + ' Heterogeneity distrubition') fig.savefig(pathfilename + '_Heterogeneity_distrubition' + '.png',dpi=300) plt.close() print 'Saved scatter plot:', savefilename sortsumpeak = np.sort(sumfilterpeak) sortsumpeakdiff = np.diff(sortsumpeak) diffdistrubition = np.sort(sortsumpeakdiff.copy()) limit = np.mean(diffdistrubition) fig, ax = plt.subplots(1) ppl.plot(np.arange(len(sortsumpeakdiff)), sortsumpeakdiff) ppl.plot(np.arange(len(diffdistrubition)), diffdistrubition)
loadings = pandas.DataFrame({ "loadings": reduced_data.components_[1, :], "kmer": kmer_colums }) loadings.sort('loadings') # Scatter plot of GC rich vs AT rich gc_rich_kmers = [x for x in kmer_colums if set(x) == set(['G', 'C'])] at_rich_kmers = [x for x in kmer_colums if set(x) == set(['A', 'T'])] gc_rich = input_kmers_counts[gc_rich_kmers].apply(lambda x: sum(x), axis=1) at_rich = input_kmers_counts[at_rich_kmers].apply(lambda x: sum(x), axis=1) kmer_gc_only = pandas.concat((gc_rich, at_rich), axis=1) ppl.scatter(kmer_gc_only[0], kmer_gc_only[1], c=colors) pl.show() # We make a data frame with the PCA coordinates and all annotations input_kmers_counts_output = pandas.pivot_table( input_kmers, values="count", index=['sequence_description', 'sequence_length', 'GC'], columns=["kmer"], fill_value=0) reduced_data_coord = reduced_data.fit_transform( input_kmers_counts[kmer_colums]) output_table = pandas.DataFrame({
totalpeak = np.append(totalpeak, oripeak) totalinterval = np.append(totalinterval, [interval] * len(oripeak)) for iprint in range(len(oripeak)): print 'peak', oripeak[iprint], 'Popen(avg)', oripopendiffpeak[iprint], 'Popen(std)', oripopendiffstdamp[iprint] if count == 10: #plotorifit(savefilename, std, oripeak, oripopendiffstdamp, interval, precentage) plotpopendiff(savefilename, result, oripeak, oripopendiffstdamp) count = 0 count += 1 fig, ax = plt.subplots(1) ppl.scatter(ax, totalpeak, totalinterval) ax.set_title(savefilename + ' Heterogeneity distrubition Start:') fig.savefig(pathfilename + '_Heterogeneity_distrubition' + '.png',dpi=500) plt.close() print 'Saved scatter plot:' + savefilename ''' if os.path.exists(savefilename + '.npy'): print savefilename + '.npy', 'Found. Loading the data.' plotcolour = np.load(savefilename + '.npy') print savefilename + '.npy', 'Loaded.' else:
def make_scatter(self, use_prettyplotlib=True, hists=True, num_bins=None): ''' Plot two columns against each other. If self.subplot is enabled, all comparisons returned in a triangle collection. Inspiration for this form came from the package ![triangle.py](https://github.com/dfm/triangle.py). Small snippets to set the labels and figure size came from triangle.py. ''' if use_prettyplotlib: try: import prettyplotlib as plt except ImportError: import matplotlib.pyplot as plt use_prettyplotlib = False print "prettyplotlib not installed. Using matplotlib..." else: import matplotlib.pyplot as plt # Setup subplots if plotting together if self.subplot: # Make the objects num = len(self.columns) factor = 2.0 # size of one side of one panel lbdim = 0.5 * factor # size of left/bottom margin trdim = 0.3 * factor # size of top/right margin whspace = 0.05 # w/hspace size plotdim = factor * num + factor * (num - 1.) * whspace dim = lbdim + plotdim + trdim fig, axes = plt.subplots(nrows=num, ncols=num, figsize=(dim, dim)) lb = lbdim / dim tr = (lbdim + plotdim) / dim fig.subplots_adjust(left=lb, bottom=lb, right=tr, top=tr, wspace=whspace, hspace=whspace) for i, column1 in enumerate(self.columns): for j, column2 in enumerate(self.columns): data1 = self.dataframe[column1] data2 = self.dataframe[column2] # Get rid of nans nans = np.isnan(data1) + np.isnan(data2) data1 = data1[~nans] data2 = data2[~nans] if self.subplot: ax = axes[i, j] if j > i: # Don't bother plotting duplicates ax.set_visible(False) ax.set_frame_on(False) else: if j == i: # Plot histograms # Set number of bins if num_bins is None: num_bins = np.sqrt(len(data1)) if hists == True: if use_prettyplotlib: plt.hist(ax, data1, num_bins, grid="y") else: ax.hist(data1, num_bins) ax.grid(True) else: ax.set_visible(False) ax.set_frame_on(False) ax.set_xticklabels([]) ax.set_yticklabels([]) if j != i: if use_prettyplotlib: plt.scatter(ax, data2, data1) else: ax.scatter(data2, data1) ax.grid(True) ax.xaxis.set_major_locator(MaxNLocator(5)) ax.yaxis.set_major_locator(MaxNLocator(5)) if i < num - 1: ax.set_xticklabels([]) else: [l.set_rotation(45) for l in ax.get_xticklabels()] ax.set_xlabel(column2) ax.xaxis.set_label_coords(0.5, -0.3) if j > 0: ax.set_yticklabels([]) else: [l.set_rotation(45) for l in ax.get_yticklabels()] ax.set_ylabel(column1) ax.yaxis.set_label_coords(-0.3, 0.5) else: if j < i: fig, axes = plt.subplots(1) if use_prettyplotlib: plt.scatter(axes, data2, data1, grid="y") else: axes.scatter(data2, data1) axes.grid(True) axes.set_xlabel(column2) # ADD UNITS! axes.set_ylabel(column1) # ADD UNITS! if self.verbose: p.show() else: fig.savefig(self.save_name+"_"+column1+"_"+column2+"."+self.save_type) p.close() if self.subplot: # p.tight_layout() if self.verbose: p.show() else: fig.savefig(self.save_name+"_"+"scatter"+"."+self.save_type)
def make_scatter(self, use_prettyplotlib=True, hists=True, num_bins=None): ''' Plot two columns against each other. If self.subplot is enabled, all comparisons returned in a triangle collection. Inspiration for this form came from the package ![triangle.py](https://github.com/dfm/triangle.py). Small snippets to set the labels and figure size came from triangle.py. ''' if use_prettyplotlib: try: import prettyplotlib as plt except ImportError: import matplotlib.pyplot as plt use_prettyplotlib = False print "prettyplotlib not installed. Using matplotlib..." else: import matplotlib.pyplot as plt # Setup subplots if plotting together if self.subplot: # Make the objects num = len(self.columns) factor = 2.0 # size of one side of one panel lbdim = 0.5 * factor # size of left/bottom margin trdim = 0.3 * factor # size of top/right margin whspace = 0.05 # w/hspace size plotdim = factor * num + factor * (num - 1.) * whspace dim = lbdim + plotdim + trdim fig, axes = plt.subplots(nrows=num, ncols=num, figsize=(dim, dim)) lb = lbdim / dim tr = (lbdim + plotdim) / dim fig.subplots_adjust(left=lb, bottom=lb, right=tr, top=tr, wspace=whspace, hspace=whspace) for i, column1 in enumerate(self.columns): for j, column2 in enumerate(self.columns): data1 = self.dataframe[column1] data2 = self.dataframe[column2] # Get rid of nans nans = np.isnan(data1) + np.isnan(data2) data1 = data1[~nans] data2 = data2[~nans] if self.subplot: ax = axes[i, j] if j > i: # Don't bother plotting duplicates ax.set_visible(False) ax.set_frame_on(False) else: if j == i: # Plot histograms # Set number of bins if num_bins is None: num_bins = np.sqrt(len(data1)) if hists == True: if use_prettyplotlib: plt.hist(ax, data1, num_bins, grid="y") else: ax.hist(data1, num_bins) ax.grid(True) else: ax.set_visible(False) ax.set_frame_on(False) ax.set_xticklabels([]) ax.set_yticklabels([]) if j != i: if use_prettyplotlib: plt.scatter(ax, data2, data1) else: ax.scatter(data2, data1) ax.grid(True) ax.xaxis.set_major_locator(MaxNLocator(5)) ax.yaxis.set_major_locator(MaxNLocator(5)) if i < num - 1: ax.set_xticklabels([]) else: [l.set_rotation(45) for l in ax.get_xticklabels()] ax.set_xlabel(column2) ax.xaxis.set_label_coords(0.5, -0.3) if j > 0: ax.set_yticklabels([]) else: [l.set_rotation(45) for l in ax.get_yticklabels()] ax.set_ylabel(column1) ax.yaxis.set_label_coords(-0.3, 0.5) else: if j < i: fig, axes = plt.subplots(1) if use_prettyplotlib: plt.scatter(axes, data2, data1, grid="y") else: axes.scatter(data2, data1) axes.grid(True) axes.set_xlabel(column2) # ADD UNITS! axes.set_ylabel(column1) # ADD UNITS! if self.verbose: p.show() else: fig.savefig(self.save_name + "_" + column1 + "_" + column2 + "." + self.save_type) p.close() if self.subplot: # p.tight_layout() if self.verbose: p.show() else: fig.savefig(self.save_name + "_" + "scatter" + "." + self.save_type)