def timed_agg(df, filepath, plot_width=int(900), plot_height=int(900*7.0/12), cache_ranges=True): global CACHED_RANGES start = time.time() cvs = ds.Canvas(plot_width, plot_height, x_range=CACHED_RANGES[0], y_range=CACHED_RANGES[1]) agg = cvs.points(df, p.x, p.y) end = time.time() if cache_ranges: CACHED_RANGES = (cvs.x_range, cvs.y_range) img = export_image(tf.shade(agg),filepath,export_path=".") return img, end-start
def master_tile(data, img, hex_code, z, dot_size=None): """ makes the map at zoom level = z to be broken down into tiles :param zoom_level: :return: """ dim = map_size(z) proj_data = proj_factor(z, img) * data proj_data.y = dim - proj_data.y scene = ds.Canvas(x_range=[0, dim], y_range=[0, dim], plot_width=dim, plot_height=dim) aggregation = scene.points(proj_data, 'x', 'y') image = tf.shade(aggregation, cmap=[hex_code], alpha=100) if dot_size: image = tf.spread(image, px=1, shape='circle', name="spread square") export_image(image, 'master_tile', background=None) return image
def create_plot(data, out, width): """Creates a figure of the ZVV transit network using ZVV's color scheme. Args: data: a csv file containing data usable for line plots out: the generated imnage is saved here Returns: None """ plot_data = pd.read_csv(data, low_memory=False) x_range = (plot_data.shape_pt_lon.min(), plot_data.shape_pt_lon.max()) y_range = (plot_data.shape_pt_lat.min(), plot_data.shape_pt_lat.max()) height = int( round(width * (y_range[1] - y_range[0]) / (x_range[1] - x_range[0]))) cvs = ds.Canvas(plot_width=width, plot_height=height, x_range=x_range, y_range=y_range) layers = [] for color, data_part in plot_data.groupby('route_color'): agg = cvs.line(data_part, 'shape_pt_lon', 'shape_pt_lat', agg=ds.sum('times_taken')) image_part = tf.shade(agg, cmap=['#000000', '#' + color], how='eq_hist') layers.append(image_part) image = tf.stack(*layers, how='add') if out.endswith('.png'): out = out[:-4] export_image(image, filename=out, background='black')
def timed_agg(df, filepath, plot_width=int(900), plot_height=int(900 * 7.0 / 12), cache_ranges=True): global CACHED_RANGES start = time.time() cvs = ds.Canvas(plot_width, plot_height, x_range=CACHED_RANGES[0], y_range=CACHED_RANGES[1]) agg = cvs.points(df, p.x, p.y) end = time.time() if cache_ranges: CACHED_RANGES = (cvs.x_range, cvs.y_range) img = export_image(tf.shade(agg), filepath, export_path=".") return img, end - start
if black: img = tf.set_background(img, 'black') return img def tests_datashader(): import datashader as ds import datashader.transfer_functions as tf import pandas as pd df = pd.read_csv('/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv',usecols=[6,7,14],sep="|",skiprows=0) agg_mean = cvs.points(df, 'longitude', 'latitude', ds.mean('observation_value')) agg_max = cvs.points(df, 'longitude', 'latitude', ds.max('observation_value')) agg_min = cvs.points(df, 'longitude', 'latitude', ds.min('observation_value')) agg_count = cvs.points(df, 'longitude', 'latitude', ds.count('observation_value')) #tf.shade(agg.where(agg > 0), cmap=["lightblue", "darkblue"]) #img = tf.shade(agg.where(agg > 0), cmap=['green', 'yellow', 'red'], how='linear', span=[275,305]) df = pd.read_csv('/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv',usecols=[6,7,14],sep="|",skiprows=0) bounds = dict(x_range = (-180, 180), y_range = (-90, 90)) plot_width = 360*10 plot_height = 180*10 canvas = ds.Canvas(plot_width=plot_width, plot_height=plot_height,**bounds) agg_mean = canvas.points(df, 'longitude', 'latitude', ds.max('observation_value')) img = tf.shade(agg_mean, cmap=['green', 'yellow', 'red'], how='linear', span=[275,305]) utils.export_image(img=img,filename='Oct2431doshade.png', fmt=".png", background=None) points = hv.Points(df['observation_value'].values) img = points.hist()
'#f1f9a9', '#bfe5a0', '#74c7a5', '#378ebb', '#5e4fa2' ] color_key = {str(d):c for d,c in enumerate(pal)} reducer = umap.UMAP(random_state=42) embedding = reducer.fit_transform(data) df = pd.DataFrame(embedding, columns=('x', 'y')) df['class'] = pd.Series([str(x) for x in target], dtype="category") cvs = ds.Canvas(plot_width=400, plot_height=400) agg = cvs.points(df, 'x', 'y', ds.count_cat('class')) img = tf.shade(agg, color_key=color_key, how='eq_hist') utils.export_image(img, filename='fashion-mnist', background='black') image = plt.imread('fashion-mnist.png') fig, ax = plt.subplots(figsize=(6, 6)) plt.imshow(image) plt.setp(ax, xticks=[], yticks=[]) plt.title("Fashion MNIST data embedded\n" "into two dimensions by UMAP\n" "visualised with Datashader", fontsize=12) plt.show()
gz_catalog = pd.read_csv( '{}/nsa_all_raw_gz_counts_10.0_arcsec.csv'.format(catalog_dir)) print('gz nsa galaxies: {}'.format(len(gz_catalog))) joint_catalog = Table( fits.getdata('{}/nsa_v1_0_0_decals_dr5.fits'.format(catalog_dir))) joint_catalog = joint_catalog[['nsa_id', 'ra', 'dec', 'petrotheta']].to_pandas() print('decals nsa galaxies: {}'.format(len(joint_catalog))) gz_and_decals = pd.merge(gz_catalog, joint_catalog, on='nsa_id', how='outer') print('gz and decals: {}'.format(len(gz_and_decals))) old_galaxies, new_galaxies = match_galaxies_to_catalog(joint_catalog, gz_and_decals) print(len(old_galaxies)) print(len(new_galaxies)) print(new_galaxies.columns.values) plot_catalog_overlap(old_galaxies, new_galaxies, ['old', 'new']) canvas = ds.Canvas(plot_width=400, plot_height=400) aggc = canvas.points(new_galaxies, 'ra', 'dec') img = tf.shade(aggc) export_image(img, 'new_galaxies_in_dr5') new_galaxies['petrotheta'].hist() plt.savefig('petrotheta_of_new.png') plt.clf()
import datashader as ds import datashader.transfer_functions as tf from datashader.utils import export_image import pandas as pd df = pd.read_csv(r"./data/store_data.csv", sep="\t") cvs = ds.Canvas(plot_width=800, plot_height=800) agg = cvs.points(df, 'Longitude', 'Latitude', ds.mean('NatlStrNumber')) img = tf.shade(agg, cmap=['lightblue', 'darkblue'], color_key=[ '#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#ffff33', '#a65628', '#f781bf', '#999999', '#66c2a5', '#fc8d62', '#8da0cb', '#a6d854', '#ffd92f', '#e5c494', '#ffffb3', '#fb8072', '#fdb462', '#fccde5', '#d9d9d9', '#ccebc5', '#ffed6f' ], how='eq_hist', alpha=255, min_alpha=40) export_image(img, "image")
ds.max('observation_value')) agg_min = cvs.points(df, 'longitude', 'latitude', ds.min('observation_value')) agg_count = cvs.points(df, 'longitude', 'latitude', ds.count('observation_value')) #tf.shade(agg.where(agg > 0), cmap=["lightblue", "darkblue"]) #img = tf.shade(agg.where(agg > 0), cmap=['green', 'yellow', 'red'], how='linear', span=[275,305]) df = pd.read_csv( '/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv', usecols=[6, 7, 14], sep="|", skiprows=0) bounds = dict(x_range=(-180, 180), y_range=(-90, 90)) plot_width = 360 * 10 plot_height = 180 * 10 canvas = ds.Canvas(plot_width=plot_width, plot_height=plot_height, **bounds) agg_mean = canvas.points(df, 'longitude', 'latitude', ds.max('observation_value')) img = tf.shade(agg_mean, cmap=['green', 'yellow', 'red'], how='linear', span=[275, 305]) utils.export_image(img=img, filename='Oct2431doshade.png', fmt=".png", background=None) points = hv.Points(df['observation_value'].values) img = points.hist()
# basic datashader import datashader as ds import pandas as pd import datashader.transfer_functions as tf from datashader.utils import export_image data_in = 'C:\\Users\\Cob\\index\\educational\\usask\\research\\masters\\data\\lidar\\analysis\\mb_15_merged_.10m_canopy_19_149.csv' img_out = "C:\\Users\\Cob\\index\\educational\\usask\\research\\masters\\graphics\\test_swe_19_045_vs_dce.png" data = pd.read_csv(data_in) cvs = ds.Canvas(plot_width=1000, plot_height=1000) agg = cvs.points(data, 'swe_19_045', 'dce', agg=ds.count('dce')) img = tf.shade(agg, cmap=['lightblue', 'darkblue'], how='log') export_image(img, img_out) # basic datashader # import rastools # # import numpy as np # import matplotlib # matplotlib.use('TkAgg') # import matplotlib.pyplot as plt # # # plt.scatter(data.swe_19_045, data.dnt)
name="", canvas=None, cat=None, margin=0.05): if canvas is None: xr = nodes.x.min() - margin, nodes.x.max() + margin yr = nodes.y.min() - margin, nodes.y.max() + margin canvas = ds.Canvas(x_range=xr, y_range=yr, **cvsopts) np = my_nodesplot(nodes, name + " nodes", canvas, cat) ep = edgesplot(edges, name + " edges", canvas) return tf.stack(ep, np, how="over", name=name) forcedirected = forceatlas2_layout(nodes, edges) fd = forcedirected fd.iat[0, 2] = (fd.x.min() + fd.x.max()) / 2 # center focus node on x fd.iat[0, 3] = 1 # center focus node on y ## save image?! image = tf.Image( my_graphplot(fd, connect_edges(fd, edges), "Force-directed", cat="type", margin=0.02)) export_image(image, filename=file) os.rename(file_name, saved_file_name)
import datashader as ds from datashader.utils import export_image import datashader.transfer_functions as tf import pandas as pd from colorcet import fire from PIL import Image from tilemap import TileMap point_df = pd.read_parquet("point_data.gzip") bounds = [1.288423e+07, 1.291500e+07, -3.772000e+06, -3.750000e+06] filter_point_df = point_df[(point_df["x"] >= bounds[0]) & (point_df["x"] <= bounds[1]) & (point_df["y"] >= bounds[2]) & (point_df["y"] <= bounds[3])] filter_point_desc_df = filter_point_df.describe() aspect_ratio = (filter_point_desc_df.loc["max", "y"] - filter_point_desc_df.loc["min", "y"])/(filter_point_desc_df.loc["max", "x"] - filter_point_desc_df.loc["min", "x"]) dim = 2000 cvs = ds.Canvas(plot_width=dim, plot_height=int(dim * aspect_ratio)) agg = cvs.points(filter_point_df, 'x', 'y') img = tf.shade(agg, cmap=fire) figname = 'perth_wires' export_image(img, figname, background="black") render_map = TileMap(extents=(115.7410072222, -32.0661730556, 116.0174188889, -31.8985416667)) map_image = render_map.render(zoom=14) map_image_resized = map_image.resize((2000, 1430), resample=Image.LANCZOS) map_image_resized.putalpha(255) map_image_resized.save("perth_street_map.png")
def plot_knn_f1scores(plot_label=''): # Plots F1-score for each source from the nearest neighbours found using knn_closest. Input is a list of indices. # If dim==1 knn found in 1-D. If dim==10, knn found in 10-D. (see later half of this function for details) # Choose to plot as function of 1D feature or r magnitude. # Load output from previous run: print('Loading knn indices from previous run saved on disk...') filename1d = 'knn_f1scores_1D' filename10d = 'knn_f1scores_10D' try: knn_f1scores_1d = load_obj(filename1d) knn_f1scores_10d = load_obj(filename10d) except: print( 'Failed to load knn_f1scores_*.pkl from disk - did you run "get_knn_accuracy()" yet?' ) exit() # combine list of dicts into single dictionary knn_f1scores_1d = { k: [d.get(k) for d in knn_f1scores_1d] for k in {k for d in knn_f1scores_1d for k in d} } knn_f1scores_10d = { k: [d.get(k) for d in knn_f1scores_10d] for k in {k for d in knn_f1scores_10d for k in d} } df1d = pd.DataFrame(knn_f1scores_1d) df10d = pd.DataFrame(knn_f1scores_10d) # 1D df1d_g = df1d[[ 'galaxy_xvar_mean', 'galaxy_xvar_std', 'galaxy_probs_mean', 'galaxy_probs_std', 'f1g', 'f1gerr', 'correct_source' ]].copy() df1d_q = df1d[[ 'quasar_xvar_mean', 'quasar_xvar_std', 'quasar_probs_mean', 'quasar_probs_std', 'f1q', 'f1qerr', 'correct_source' ]].copy() df1d_s = df1d[[ 'star_xvar_mean', 'star_xvar_std', 'star_probs_mean', 'star_probs_std', 'f1s', 'f1serr', 'correct_source' ]].copy() df1d_g['class'] = 'GALAXY' df1d_g.columns = [ 'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df1d_q['class'] = 'QSO' df1d_q.columns = [ 'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df1d_s['class'] = 'STAR' df1d_s.columns = [ 'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df_all_1d = pd.concat([df1d_g, df1d_q, df1d_s], axis=0) df_all_1d['class'] = df_all_1d['class'].astype( 'category') # datashader wants categorical class df10d_g = df10d[[ 'galaxy_xvar_mean', 'galaxy_xvar_std', 'galaxy_probs_mean', 'galaxy_probs_std', 'f1g', 'f1gerr', 'correct_source' ]].copy() df10d_q = df10d[[ 'quasar_xvar_mean', 'quasar_xvar_std', 'quasar_probs_mean', 'quasar_probs_std', 'f1q', 'f1qerr', 'correct_source' ]].copy() df10d_s = df10d[[ 'star_xvar_mean', 'star_xvar_std', 'star_probs_mean', 'star_probs_std', 'f1s', 'f1serr', 'correct_source' ]].copy() df10d_g['class'] = 'GALAXY' df10d_g.columns = [ 'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df10d_q['class'] = 'QSO' df10d_q.columns = [ 'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df10d_s['class'] = 'STAR' df10d_s.columns = [ 'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1', 'f1err', 'correct_source', 'class' ] df_all_10d = pd.concat([df10d_g, df10d_q, df10d_s], axis=0) df_all_10d['class'] = df_all_10d['class'].astype( 'category') # datashader wants categorical class # Did we fit the knn in 1-D or in 10-D? # In 1-D a few thousand nearest neighbours will likely be a healthy mix of the 3 classes throughout most/all of the feature space. So you will get reliable numbers for F1 scores per class (perhaps with differring error bars). These are basically a round-about way of getting F1 scores shown in the histogram created by the function plot_histogram_matrix_f1. It is nice they agree (they most definately should). The mannor in which they agree is interesting - since knn effectively uses variable bin widths to get enough nearest neighbours, whilst plot_histogram_matrix_f1 uses fixed bin widths and averages within that bin. # select correct sources only? # Only plot f1-score for correct object type in question. e.g. If it's a galaxy, nearest 10000 objects will likely only be galaxies, so f1 for star and quasar will be very poor or zero because there are no True Positives in this area of 1-D feature space. In 1-D feature space the 10000 nearest neighbours were a healthy mix of all three classes so we didn't have this problem. print(df_all_1d.correct_source.value_counts()) print(df_all_10d.correct_source.value_counts()) df_all_1d = df_all_1d[df_all_1d.correct_source == 1] df_all_10d = df_all_10d[df_all_10d.correct_source == 1] # only 5000 sources are wrong, not so bad. # Create datashader pngs for each plot, since we have too much data for matplotlib to handle # 1D - 1dfeature vs f1 xmin1d = df1d.star_xvar_mean.min() - 0.1 # padd for plotting later xmax1d = df1d.star_xvar_mean.max() + 0.1 print(xmin1d, xmax1d) ymin = 0 ymax = 1.05 cvs = ds.Canvas(plot_width=1000, plot_height=600, x_range=(xmin1d, xmax1d), y_range=(ymin, ymax), x_axis_type='linear', y_axis_type='linear') agg = cvs.points(df_all_1d, 'feature1d_mean', 'f1', ds.count_cat('class')) ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue') img = tf.shade(agg, color_key=ckey, how='log') export_image(img, 'knn1d_1d_vs_f1', fmt='.png', background='white') # 10D - 1dfeature vs f1 xmin10d = df10d.star_xvar_mean.min() - 0.1 # padd for plotting later xmax10d = df10d.star_xvar_mean.max() + 0.1 print(xmin10d, xmax10d) ymin = 0 ymax = 1.05 cvs = ds.Canvas(plot_width=200, plot_height=120, x_range=(xmin10d, xmax10d), y_range=(ymin, ymax), x_axis_type='linear', y_axis_type='linear') agg = cvs.points(df_all_10d, 'feature10d_mean', 'f1', ds.count_cat('class')) ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue') img = tf.shade(agg, color_key=ckey, how='log') export_image(img, 'knn10d_1d_vs_f1', fmt='.png', background='white') # 1D - prob vs f1 xmin1d_probs = 0 # padd for plotting later xmax1d_probs = 1.05 ymin = 0 ymax = 1.05 cvs = ds.Canvas(plot_width=300, plot_height=300, x_range=(xmin1d_probs, xmax1d_probs), y_range=(ymin, ymax), x_axis_type='linear', y_axis_type='linear') agg = cvs.points(df_all_1d, 'probs_mean', 'f1', ds.count_cat('class')) ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue') img = tf.shade(agg, color_key=ckey, how='log') export_image(img, 'knn1d_probs_vs_f1', fmt='.png', background='white') # 10D - 1dfeature vs f1 xmin10d_probs = 0 # padd for plotting later xmax10d_probs = 1.05 ymin = 0 ymax = 1.05 cvs = ds.Canvas(plot_width=200, plot_height=200, x_range=(xmin10d_probs, xmax10d_probs), y_range=(ymin, ymax), x_axis_type='linear', y_axis_type='linear') agg = cvs.points(df_all_10d, 'probs_mean', 'f1', ds.count_cat('class')) ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue') img = tf.shade(agg, color_key=ckey, how='log') export_image(img, 'knn10d_probs_vs_f1', fmt='.png', background='white') # ----------------- plotting ----------------- # get datashader pngs, and plot a small sample of points over the top to guide eye with error bars. img_1d_1d = mpimg.imread('knn1d_1d_vs_f1.png') img_1d_probs = mpimg.imread('knn1d_probs_vs_f1.png') mpl.rcParams.update({'font.size': 10}) markeredgewidth = 0.5 mew = 0.5 elinewidth = 0.5 fig, axs = plt.subplots(1, 2, figsize=(14.5, 4)) # --- 1D --- 1d --- plt.sca(axs[0]) plt.imshow(img_1d_1d, extent=[xmin1d, xmax1d, ymin * 10, ymax * 10]) # make yaxis 10 times larger # fix ylabels after scaling the axis ylabels = axs[0].get_yticks() new_ylabels = [l / 10 for l in ylabels] # account for factor of 10 increase axs[0].set_yticklabels(new_ylabels) axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # plot sample over the top to get a feel for error bars samp = 2500 plt.errorbar(df1d_g[0::samp]['feature1d_mean'], df1d_g[0::samp]['f1'] * 10, xerr=df1d_g[0::samp]['feature1d_std'], yerr=df1d_g[0::samp]['f1err'] * 10, color=galaxy_c, elinewidth=elinewidth, markeredgewidth=mew, ls='none', label='Galaxies') plt.errorbar(df1d_q[0::samp]['feature1d_mean'], df1d_q[0::samp]['f1'] * 10, xerr=df1d_q[0::samp]['feature1d_std'], yerr=df1d_q[0::samp]['f1err'] * 10, color=quasar_c, elinewidth=elinewidth, markeredgewidth=mew, ls='none', label='Quasars') plt.errorbar(df1d_s[0::samp]['feature1d_mean'], df1d_s[0::samp]['f1'] * 10, xerr=df1d_s[0::samp]['feature1d_std'], yerr=df1d_s[0::samp]['f1err'] * 10, color=star_c, elinewidth=elinewidth, markeredgewidth=mew, ls='none', label='Stars') plt.tick_params(axis='y', which='both', right=True) plt.minorticks_on() plt.xlabel('1D feature') plt.ylabel('F1 score in 1 dimensions') #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8) plt.xlim(-7, 12.5) plt.legend(frameon=False, loc='lower right') plt.tight_layout() fig.tight_layout() # --- 1D --- probs --- plt.sca(axs[1]) xf = 2 plt.imshow(img_1d_probs, extent=[xmin1d_probs * xf, xmax1d_probs * xf, ymin, ymax]) # make xaxis larger # fix ylabels after scaling the axis #xlabels = axs[0].get_xticks() #new_xlabels = [l/xf for l in xlabels] # account for scaling axis axs[1].set_xticks(np.arange(0, 2.1, step=0.2)) axs[1].set_xticklabels(np.arange(0, 1.1, step=0.1)) #axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # doesn't work # getting some labels with 8 F****** decimal places without these two lines: labels = [item.get_text() for item in axs[1].get_xticklabels()] axs[1].set_xticklabels([str(round(float(label), 2)) for label in labels]) # plot sample over the top to get a feel for error bars df1d_g2 = df1d_g[(df1d_g.f1 < 0.85) & (df1d_g.probs_mean < 0.85)][0::3000] plt.errorbar(df1d_g2['probs_mean'] * xf, df1d_g2['f1'], xerr=df1d_g2['probs_std'] * xf, yerr=df1d_g2['f1err'], color=galaxy_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Galaxies') df1d_q2 = df1d_q[(df1d_q.f1 < 0.85) & (df1d_q.probs_mean < 0.85)][0::3000] plt.errorbar(df1d_q2['probs_mean'] * xf, df1d_q2['f1'], xerr=df1d_q2['probs_std'] * xf, yerr=df1d_q2['f1err'], color=quasar_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Quasars') df1d_q2 = df1d_q[(df1d_q.f1 < 0.85) & (df1d_q.probs_mean < 0.75)][ 0::800] # plot more at lower values in undersampled region plt.errorbar(df1d_q2['probs_mean'] * xf, df1d_q2['f1'], xerr=df1d_q2['probs_std'] * xf, yerr=df1d_q2['f1err'], color=quasar_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew) df1d_s2 = df1d_s[(df1d_s.f1 < 0.85) & (df1d_s.probs_mean < 0.85)][0::3000] plt.errorbar(df1d_s2['probs_mean'] * xf, df1d_s2['f1'], xerr=df1d_s2['probs_std'] * xf, yerr=df1d_s2['f1err'], color=star_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Stars') plt.tick_params(axis='y', which='both', right=True) plt.minorticks_on() plt.xlabel('Classification probability') plt.ylabel('F1 score in 1 dimension') #axs[0].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 1 dimension', verticalalignment='bottom', horizontalalignment='right', transform=axs[0].transAxes, color='black', fontsize=8) #plt.xlim(0.66,2) plt.tight_layout() #fig.subplots_adjust(wspace=0.1, hspace=0.1) # Must come after tight_layout to work! ... doesn't seem to work when using imshow :( fig.savefig('knn_plot_1D' + plot_label + '.pdf') plt.clf() # ---------------- 10-d ---------------- # ----------------- plotting ----------------- elinewidth = 0.2 mpl.rcParams.update({'font.size': 10}) # else its really small in the paper img_10d_1d = mpimg.imread('knn10d_1d_vs_f1.png') img_10d_probs = mpimg.imread('knn10d_probs_vs_f1.png') fig, axs = plt.subplots(1, 2, figsize=(14.5, 4)) xf = 2 # make x-axis twice as long as y. # --- 10D --- plt.sca(axs[0]) plt.imshow(img_10d_1d, extent=[xmin10d, xmax10d, ymin * 10, ymax * 10]) # make yaxis 10 times larger # fix ylabels after scaling the axis ylabels = axs[0].get_yticks() new_ylabels = [l / 10 for l in ylabels] # account for factor of 10 increase axs[0].set_yticklabels(new_ylabels) axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # plot sample over the top to get a feel for error bars df10d_g2 = df10d_g[df10d_g.f1 < 0.95][ 0:: 500] # only plot error bars below 0.95 because above this they are v small. plt.errorbar(df10d_g2['feature10d_mean'], df10d_g2['f1'] * 10, xerr=df10d_g2['feature10d_std'], yerr=df10d_g2['f1err'] * 10, color=galaxy_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Galaxies') df10d_q2 = df10d_q[df10d_q.f1 < 0.95][0::500] plt.errorbar(df10d_q2['feature10d_mean'], df10d_q2['f1'] * 10, xerr=df10d_q2['feature10d_std'], yerr=df10d_q2['f1err'] * 10, color=quasar_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Quasars') df10d_s2 = df10d_s[df10d_s.f1 < 0.95][0::500] plt.errorbar(df10d_s2['feature10d_mean'], df10d_s2['f1'] * 10, xerr=df10d_s2['feature10d_std'], yerr=df10d_s2['f1err'] * 10, color=star_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Stars') plt.tick_params(axis='y', which='both', right=True) plt.minorticks_on() plt.xlabel('1D feature') plt.ylabel('F1 score in 10 dimensions') #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8) plt.xlim(-7, 12.5) plt.tight_layout() # --- 10D --- probs --- plt.sca(axs[1]) plt.imshow(img_10d_probs, extent=[xmin10d_probs * xf, xmax10d_probs * xf, ymin, ymax]) # make xaxis larger # fix ylabels after scaling the axis #xlabels = axs[1].get_xticks() #new_xlabels = [l/xf for l in xlabels] # account for scaling axis #axs[1].set_xticklabels(new_xlabels) axs[1].set_xticks(np.arange(0, 2.1, step=0.2)) axs[1].set_xticklabels(np.arange(0, 1.1, step=0.1)) #axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # doesn't work labels = [item.get_text() for item in axs[1].get_xticklabels()] axs[1].set_xticklabels([str(round(float(label), 2)) for label in labels]) # plot sample over the top to get a feel for error bars df10d_g2 = df10d_g[(df10d_g.f1 < 0.85) & ( df10d_g.probs_mean < 0.85 )][0:: 1000] # only plot error bars below 0.95 because above this they are v small, and overcrowd the plot. plt.errorbar(df10d_g2['probs_mean'] * xf, df10d_g2['f1'], xerr=df10d_g2['probs_std'] * xf, yerr=df10d_g2['f1err'], color=galaxy_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Galaxy') df10d_q2 = df10d_q[(df10d_q.f1 < 0.85) & (df10d_q.probs_mean < 0.85)][0::1000] plt.errorbar(df10d_q2['probs_mean'] * xf, df10d_q2['f1'], xerr=df10d_q2['probs_std'] * xf, yerr=df10d_q2['f1err'], color=quasar_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Quasar') df10d_s2 = df10d_s[(df10d_s.f1 < 0.85) & (df10d_s.probs_mean < 0.85)][0::1000] plt.errorbar(df10d_s2['probs_mean'] * xf, df10d_s2['f1'], xerr=df10d_s2['probs_std'] * xf, yerr=df10d_s2['f1err'], color=star_c, elinewidth=elinewidth, ls='none', markeredgewidth=mew, label='Star') plt.tick_params(axis='y', which='both', right=True) plt.minorticks_on() plt.xlabel('Classification probability') plt.ylabel('F1 score in 10 dimensions') plt.legend(frameon=False, loc='upper left') #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8) plt.tight_layout() fig.tight_layout() #plt.xlim(0.66,2) fig.savefig('knn_plot_10D' + plot_label + '.pdf')
from datashader import utils os.chdir("//sbs2003/Daten-CME/") t1 = time.time() def data_pool(file): df = dd.read_parquet(file) print(file + " loaded") return df data = None if __name__ == '__main__': print(datetime.datetime.now()) t1 = time.time() files = glob.iglob('*.csv_2_.parquet') p = Pool(os.cpu_count()) data = dd.concat(p.map(data_pool, files)) # reset_index(drop=True)) canvas = ds.Canvas(x_range=(-74.25, -73.7), y_range=(40.5, 41), plot_width=8000, plot_height=8000) agg = canvas.points(data, 'End_Lon', 'End_Lat') pic = tf.set_background(tf.shade(agg, cmap=reversed(blues)), color="#364564") #364564 utils.export_image(pic, "NYCPlot fn1", fmt=".png") print("time needed", time.time() - t1)
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno from IPython.core.display import HTML, display print("Begin...") #df = pd.read_hdf('census.h5', 'census') USA = ((-13884029, -7453304), (2698291, 6455972)) plot_width = int(1000) plot_height = int(plot_width*7.0/12) df = pd.DataFrame( {'meterswest': np.random.random(10000)*1000e3 + -10668666.5, 'metersnorth': np.random.random(10000)*1000e3 + 4577131.5} ) print(df.tail()) background = "black" display(HTML("<style>.container { width:100% !important; }</style>")) print("Computing aggregate...") cvs = ds.Canvas(plot_width, plot_height, *USA) agg = cvs.points(df, 'meterswest', 'metersnorth') print("Making Image ...") cmap = colormap_select(Hot,0.2, reverse=(background!="black")) interp = tf.interpolate(agg, cmap = cmap, how='eq_hist') export_image(interp, "census_ds_hot_eq_hist", background=background)