Example #1
0
def timed_agg(df, filepath, plot_width=int(900), plot_height=int(900*7.0/12), cache_ranges=True):
    global CACHED_RANGES
    start = time.time()
    cvs = ds.Canvas(plot_width, plot_height, x_range=CACHED_RANGES[0], y_range=CACHED_RANGES[1])
    agg = cvs.points(df, p.x, p.y)
    end = time.time()
    if cache_ranges:
        CACHED_RANGES = (cvs.x_range, cvs.y_range)
    img = export_image(tf.shade(agg),filepath,export_path=".")
    return img, end-start
Example #2
0
def master_tile(data, img, hex_code, z, dot_size=None):
    """
    makes the map at zoom level = z to be broken down into tiles
    :param zoom_level:
    :return:
    """
    dim = map_size(z)
    proj_data = proj_factor(z, img) * data
    proj_data.y = dim - proj_data.y

    scene = ds.Canvas(x_range=[0, dim],
                      y_range=[0, dim],
                      plot_width=dim,
                      plot_height=dim)
    aggregation = scene.points(proj_data, 'x', 'y')
    image = tf.shade(aggregation, cmap=[hex_code], alpha=100)
    if dot_size:
        image = tf.spread(image, px=1, shape='circle', name="spread square")
    export_image(image, 'master_tile', background=None)
    return image
def create_plot(data, out, width):
    """Creates a figure of the ZVV transit network using ZVV's color scheme.

    Args:
        data: a csv file containing data usable for line plots
        out: the generated imnage is saved here

    Returns:
        None
    """

    plot_data = pd.read_csv(data, low_memory=False)

    x_range = (plot_data.shape_pt_lon.min(), plot_data.shape_pt_lon.max())
    y_range = (plot_data.shape_pt_lat.min(), plot_data.shape_pt_lat.max())

    height = int(
        round(width * (y_range[1] - y_range[0]) / (x_range[1] - x_range[0])))

    cvs = ds.Canvas(plot_width=width,
                    plot_height=height,
                    x_range=x_range,
                    y_range=y_range)

    layers = []
    for color, data_part in plot_data.groupby('route_color'):
        agg = cvs.line(data_part,
                       'shape_pt_lon',
                       'shape_pt_lat',
                       agg=ds.sum('times_taken'))
        image_part = tf.shade(agg,
                              cmap=['#000000', '#' + color],
                              how='eq_hist')
        layers.append(image_part)

    image = tf.stack(*layers, how='add')

    if out.endswith('.png'):
        out = out[:-4]
    export_image(image, filename=out, background='black')
Example #4
0
def timed_agg(df,
              filepath,
              plot_width=int(900),
              plot_height=int(900 * 7.0 / 12),
              cache_ranges=True):
    global CACHED_RANGES
    start = time.time()
    cvs = ds.Canvas(plot_width,
                    plot_height,
                    x_range=CACHED_RANGES[0],
                    y_range=CACHED_RANGES[1])
    agg = cvs.points(df, p.x, p.y)
    end = time.time()
    if cache_ranges:
        CACHED_RANGES = (cvs.x_range, cvs.y_range)
    img = export_image(tf.shade(agg), filepath, export_path=".")
    return img, end - start
Example #5
0
        
    if black: img = tf.set_background(img, 'black')
    return img

def tests_datashader():
    import datashader as ds
    import datashader.transfer_functions as tf
    import pandas as pd

    df = pd.read_csv('/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv',usecols=[6,7,14],sep="|",skiprows=0) 

    agg_mean = cvs.points(df, 'longitude', 'latitude', ds.mean('observation_value'))
    agg_max = cvs.points(df, 'longitude', 'latitude', ds.max('observation_value'))
    agg_min = cvs.points(df, 'longitude', 'latitude', ds.min('observation_value'))
    agg_count = cvs.points(df, 'longitude', 'latitude', ds.count('observation_value'))
    #tf.shade(agg.where(agg > 0), cmap=["lightblue", "darkblue"])
    #img = tf.shade(agg.where(agg > 0), cmap=['green', 'yellow', 'red'], how='linear', span=[275,305])
    
 
    
df = pd.read_csv('/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv',usecols=[6,7,14],sep="|",skiprows=0) 
bounds = dict(x_range = (-180, 180), y_range = (-90, 90))
plot_width = 360*10
plot_height = 180*10
canvas = ds.Canvas(plot_width=plot_width, plot_height=plot_height,**bounds)
agg_mean = canvas.points(df, 'longitude', 'latitude', ds.max('observation_value'))
img = tf.shade(agg_mean, cmap=['green', 'yellow', 'red'], how='linear', span=[275,305])
utils.export_image(img=img,filename='Oct2431doshade.png', fmt=".png", background=None)

points = hv.Points(df['observation_value'].values)
img = points.hist()
 '#f1f9a9',
 '#bfe5a0',
 '#74c7a5',
 '#378ebb',
 '#5e4fa2'
]
color_key = {str(d):c for d,c in enumerate(pal)}

reducer = umap.UMAP(random_state=42)
embedding = reducer.fit_transform(data)

df = pd.DataFrame(embedding, columns=('x', 'y'))
df['class'] = pd.Series([str(x) for x in target], dtype="category")

cvs = ds.Canvas(plot_width=400, plot_height=400)
agg = cvs.points(df, 'x', 'y', ds.count_cat('class'))
img = tf.shade(agg, color_key=color_key, how='eq_hist')

utils.export_image(img, filename='fashion-mnist', background='black')

image = plt.imread('fashion-mnist.png')
fig, ax = plt.subplots(figsize=(6, 6))
plt.imshow(image)
plt.setp(ax, xticks=[], yticks=[])
plt.title("Fashion MNIST data embedded\n"
          "into two dimensions by UMAP\n"
          "visualised with Datashader",
          fontsize=12)

plt.show()
Example #7
0
gz_catalog = pd.read_csv(
    '{}/nsa_all_raw_gz_counts_10.0_arcsec.csv'.format(catalog_dir))
print('gz nsa galaxies: {}'.format(len(gz_catalog)))

joint_catalog = Table(
    fits.getdata('{}/nsa_v1_0_0_decals_dr5.fits'.format(catalog_dir)))
joint_catalog = joint_catalog[['nsa_id', 'ra', 'dec',
                               'petrotheta']].to_pandas()
print('decals nsa galaxies: {}'.format(len(joint_catalog)))

gz_and_decals = pd.merge(gz_catalog, joint_catalog, on='nsa_id', how='outer')
print('gz and decals: {}'.format(len(gz_and_decals)))

old_galaxies, new_galaxies = match_galaxies_to_catalog(joint_catalog,
                                                       gz_and_decals)

print(len(old_galaxies))
print(len(new_galaxies))

print(new_galaxies.columns.values)
plot_catalog_overlap(old_galaxies, new_galaxies, ['old', 'new'])

canvas = ds.Canvas(plot_width=400, plot_height=400)
aggc = canvas.points(new_galaxies, 'ra', 'dec')
img = tf.shade(aggc)
export_image(img, 'new_galaxies_in_dr5')

new_galaxies['petrotheta'].hist()
plt.savefig('petrotheta_of_new.png')
plt.clf()
Example #8
0
import datashader as ds
import datashader.transfer_functions as tf
from datashader.utils import export_image
import pandas as pd

df = pd.read_csv(r"./data/store_data.csv", sep="\t")

cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(df, 'Longitude', 'Latitude', ds.mean('NatlStrNumber'))
img = tf.shade(agg,
               cmap=['lightblue', 'darkblue'],
               color_key=[
                   '#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00',
                   '#ffff33', '#a65628', '#f781bf', '#999999', '#66c2a5',
                   '#fc8d62', '#8da0cb', '#a6d854', '#ffd92f', '#e5c494',
                   '#ffffb3', '#fb8072', '#fdb462', '#fccde5', '#d9d9d9',
                   '#ccebc5', '#ffed6f'
               ],
               how='eq_hist',
               alpha=255,
               min_alpha=40)

export_image(img, "image")
Example #9
0
                         ds.max('observation_value'))
    agg_min = cvs.points(df, 'longitude', 'latitude',
                         ds.min('observation_value'))
    agg_count = cvs.points(df, 'longitude', 'latitude',
                           ds.count('observation_value'))
    #tf.shade(agg.where(agg > 0), cmap=["lightblue", "darkblue"])
    #img = tf.shade(agg.where(agg > 0), cmap=['green', 'yellow', 'red'], how='linear', span=[275,305])


df = pd.read_csv(
    '/Users/iregon/C3S/dessaps/test_data/imma_converter/observations-sst-2014-6.psv',
    usecols=[6, 7, 14],
    sep="|",
    skiprows=0)
bounds = dict(x_range=(-180, 180), y_range=(-90, 90))
plot_width = 360 * 10
plot_height = 180 * 10
canvas = ds.Canvas(plot_width=plot_width, plot_height=plot_height, **bounds)
agg_mean = canvas.points(df, 'longitude', 'latitude',
                         ds.max('observation_value'))
img = tf.shade(agg_mean,
               cmap=['green', 'yellow', 'red'],
               how='linear',
               span=[275, 305])
utils.export_image(img=img,
                   filename='Oct2431doshade.png',
                   fmt=".png",
                   background=None)

points = hv.Points(df['observation_value'].values)
img = points.hist()
Example #10
0
# basic datashader
import datashader as ds
import pandas as pd
import datashader.transfer_functions as tf
from datashader.utils import export_image

data_in = 'C:\\Users\\Cob\\index\\educational\\usask\\research\\masters\\data\\lidar\\analysis\\mb_15_merged_.10m_canopy_19_149.csv'
img_out = "C:\\Users\\Cob\\index\\educational\\usask\\research\\masters\\graphics\\test_swe_19_045_vs_dce.png"

data = pd.read_csv(data_in)

cvs = ds.Canvas(plot_width=1000, plot_height=1000)
agg = cvs.points(data, 'swe_19_045', 'dce', agg=ds.count('dce'))
img = tf.shade(agg, cmap=['lightblue', 'darkblue'], how='log')
export_image(img, img_out)

# basic datashader
# import rastools
#
# import numpy as np
# import matplotlib
# matplotlib.use('TkAgg')
# import matplotlib.pyplot as plt
#
# # plt.scatter(data.swe_19_045, data.dnt)
Example #11
0
                     name="",
                     canvas=None,
                     cat=None,
                     margin=0.05):
        if canvas is None:
            xr = nodes.x.min() - margin, nodes.x.max() + margin
            yr = nodes.y.min() - margin, nodes.y.max() + margin
            canvas = ds.Canvas(x_range=xr, y_range=yr, **cvsopts)

        np = my_nodesplot(nodes, name + " nodes", canvas, cat)
        ep = edgesplot(edges, name + " edges", canvas)
        return tf.stack(ep, np, how="over", name=name)

    forcedirected = forceatlas2_layout(nodes, edges)
    fd = forcedirected

    fd.iat[0, 2] = (fd.x.min() + fd.x.max()) / 2  # center focus node on x
    fd.iat[0, 3] = 1  # center focus node on y

    ## save image?!

    image = tf.Image(
        my_graphplot(fd,
                     connect_edges(fd, edges),
                     "Force-directed",
                     cat="type",
                     margin=0.02))

    export_image(image, filename=file)
    os.rename(file_name, saved_file_name)
Example #12
0
import datashader as ds
from datashader.utils import export_image
import datashader.transfer_functions as tf
import pandas as pd
from colorcet import fire
from PIL import Image
from tilemap import TileMap

point_df = pd.read_parquet("point_data.gzip")
bounds = [1.288423e+07, 1.291500e+07, -3.772000e+06, -3.750000e+06]
filter_point_df = point_df[(point_df["x"] >= bounds[0]) & (point_df["x"] <= bounds[1]) & (point_df["y"] >= bounds[2]) & (point_df["y"] <= bounds[3])]
filter_point_desc_df = filter_point_df.describe()
aspect_ratio = (filter_point_desc_df.loc["max", "y"] - filter_point_desc_df.loc["min", "y"])/(filter_point_desc_df.loc["max", "x"] - filter_point_desc_df.loc["min", "x"])

dim = 2000
cvs = ds.Canvas(plot_width=dim, plot_height=int(dim * aspect_ratio))
agg = cvs.points(filter_point_df, 'x', 'y')
img = tf.shade(agg, cmap=fire)

figname = 'perth_wires'
export_image(img, figname, background="black")

render_map = TileMap(extents=(115.7410072222, -32.0661730556, 116.0174188889, -31.8985416667))
map_image = render_map.render(zoom=14)
map_image_resized = map_image.resize((2000, 1430), resample=Image.LANCZOS)
map_image_resized.putalpha(255)
map_image_resized.save("perth_street_map.png")
Example #13
0
def plot_knn_f1scores(plot_label=''):
    # Plots F1-score for each source from the nearest neighbours found using knn_closest. Input is a list of indices.
    # If dim==1 knn found in 1-D. If dim==10, knn found in 10-D. (see later half of this function for details)
    # Choose to plot as function of 1D feature or r magnitude.
    # Load output from previous run:
    print('Loading knn indices from previous run saved on disk...')
    filename1d = 'knn_f1scores_1D'
    filename10d = 'knn_f1scores_10D'

    try:
        knn_f1scores_1d = load_obj(filename1d)
        knn_f1scores_10d = load_obj(filename10d)
    except:
        print(
            'Failed to load knn_f1scores_*.pkl from disk - did you run "get_knn_accuracy()" yet?'
        )
        exit()

    # combine list of dicts into single dictionary
    knn_f1scores_1d = {
        k: [d.get(k) for d in knn_f1scores_1d]
        for k in {k
                  for d in knn_f1scores_1d for k in d}
    }
    knn_f1scores_10d = {
        k: [d.get(k) for d in knn_f1scores_10d]
        for k in {k
                  for d in knn_f1scores_10d for k in d}
    }
    df1d = pd.DataFrame(knn_f1scores_1d)
    df10d = pd.DataFrame(knn_f1scores_10d)

    # 1D
    df1d_g = df1d[[
        'galaxy_xvar_mean', 'galaxy_xvar_std', 'galaxy_probs_mean',
        'galaxy_probs_std', 'f1g', 'f1gerr', 'correct_source'
    ]].copy()
    df1d_q = df1d[[
        'quasar_xvar_mean', 'quasar_xvar_std', 'quasar_probs_mean',
        'quasar_probs_std', 'f1q', 'f1qerr', 'correct_source'
    ]].copy()
    df1d_s = df1d[[
        'star_xvar_mean', 'star_xvar_std', 'star_probs_mean', 'star_probs_std',
        'f1s', 'f1serr', 'correct_source'
    ]].copy()
    df1d_g['class'] = 'GALAXY'
    df1d_g.columns = [
        'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df1d_q['class'] = 'QSO'
    df1d_q.columns = [
        'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df1d_s['class'] = 'STAR'
    df1d_s.columns = [
        'feature1d_mean', 'feature1d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df_all_1d = pd.concat([df1d_g, df1d_q, df1d_s], axis=0)
    df_all_1d['class'] = df_all_1d['class'].astype(
        'category')  # datashader wants categorical class

    df10d_g = df10d[[
        'galaxy_xvar_mean', 'galaxy_xvar_std', 'galaxy_probs_mean',
        'galaxy_probs_std', 'f1g', 'f1gerr', 'correct_source'
    ]].copy()
    df10d_q = df10d[[
        'quasar_xvar_mean', 'quasar_xvar_std', 'quasar_probs_mean',
        'quasar_probs_std', 'f1q', 'f1qerr', 'correct_source'
    ]].copy()
    df10d_s = df10d[[
        'star_xvar_mean', 'star_xvar_std', 'star_probs_mean', 'star_probs_std',
        'f1s', 'f1serr', 'correct_source'
    ]].copy()
    df10d_g['class'] = 'GALAXY'
    df10d_g.columns = [
        'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df10d_q['class'] = 'QSO'
    df10d_q.columns = [
        'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df10d_s['class'] = 'STAR'
    df10d_s.columns = [
        'feature10d_mean', 'feature10d_std', 'probs_mean', 'probs_std', 'f1',
        'f1err', 'correct_source', 'class'
    ]
    df_all_10d = pd.concat([df10d_g, df10d_q, df10d_s], axis=0)
    df_all_10d['class'] = df_all_10d['class'].astype(
        'category')  # datashader wants categorical class

    # Did we fit the knn in 1-D or in 10-D?
    # In 1-D a few thousand nearest neighbours will likely be a healthy mix of the 3 classes throughout most/all of the feature space. So you will get reliable numbers for F1 scores per class (perhaps with differring error bars). These are basically a round-about way of getting F1 scores shown in the histogram created by the function plot_histogram_matrix_f1. It is nice they agree (they most definately should). The mannor in which they agree is interesting - since knn effectively uses variable bin widths to get enough nearest neighbours, whilst plot_histogram_matrix_f1 uses fixed bin widths and averages within that bin.

    # select correct sources only?
    # Only plot f1-score for correct object type in question. e.g. If it's a galaxy, nearest 10000 objects will likely only be galaxies, so f1 for star and quasar will be very poor or zero because there are no True Positives in this area of 1-D feature space. In 1-D feature space the 10000 nearest neighbours were a healthy mix of all three classes so we didn't have this problem.

    print(df_all_1d.correct_source.value_counts())
    print(df_all_10d.correct_source.value_counts())
    df_all_1d = df_all_1d[df_all_1d.correct_source == 1]
    df_all_10d = df_all_10d[df_all_10d.correct_source == 1]

    # only 5000 sources are wrong, not so bad.
    # Create datashader pngs for each plot, since we have too much data for matplotlib to handle

    # 1D - 1dfeature vs f1
    xmin1d = df1d.star_xvar_mean.min() - 0.1  # padd for plotting later
    xmax1d = df1d.star_xvar_mean.max() + 0.1
    print(xmin1d, xmax1d)
    ymin = 0
    ymax = 1.05
    cvs = ds.Canvas(plot_width=1000,
                    plot_height=600,
                    x_range=(xmin1d, xmax1d),
                    y_range=(ymin, ymax),
                    x_axis_type='linear',
                    y_axis_type='linear')
    agg = cvs.points(df_all_1d, 'feature1d_mean', 'f1', ds.count_cat('class'))
    ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue')
    img = tf.shade(agg, color_key=ckey, how='log')
    export_image(img, 'knn1d_1d_vs_f1', fmt='.png', background='white')

    # 10D - 1dfeature vs f1
    xmin10d = df10d.star_xvar_mean.min() - 0.1  # padd for plotting later
    xmax10d = df10d.star_xvar_mean.max() + 0.1
    print(xmin10d, xmax10d)
    ymin = 0
    ymax = 1.05
    cvs = ds.Canvas(plot_width=200,
                    plot_height=120,
                    x_range=(xmin10d, xmax10d),
                    y_range=(ymin, ymax),
                    x_axis_type='linear',
                    y_axis_type='linear')
    agg = cvs.points(df_all_10d, 'feature10d_mean', 'f1',
                     ds.count_cat('class'))
    ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue')
    img = tf.shade(agg, color_key=ckey, how='log')
    export_image(img, 'knn10d_1d_vs_f1', fmt='.png', background='white')

    # 1D - prob vs f1
    xmin1d_probs = 0  # padd for plotting later
    xmax1d_probs = 1.05
    ymin = 0
    ymax = 1.05
    cvs = ds.Canvas(plot_width=300,
                    plot_height=300,
                    x_range=(xmin1d_probs, xmax1d_probs),
                    y_range=(ymin, ymax),
                    x_axis_type='linear',
                    y_axis_type='linear')
    agg = cvs.points(df_all_1d, 'probs_mean', 'f1', ds.count_cat('class'))
    ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue')
    img = tf.shade(agg, color_key=ckey, how='log')
    export_image(img, 'knn1d_probs_vs_f1', fmt='.png', background='white')

    # 10D - 1dfeature vs f1
    xmin10d_probs = 0  # padd for plotting later
    xmax10d_probs = 1.05
    ymin = 0
    ymax = 1.05
    cvs = ds.Canvas(plot_width=200,
                    plot_height=200,
                    x_range=(xmin10d_probs, xmax10d_probs),
                    y_range=(ymin, ymax),
                    x_axis_type='linear',
                    y_axis_type='linear')
    agg = cvs.points(df_all_10d, 'probs_mean', 'f1', ds.count_cat('class'))
    ckey = dict(GALAXY=(101, 236, 101), QSO='hotpink', STAR='dodgerblue')
    img = tf.shade(agg, color_key=ckey, how='log')
    export_image(img, 'knn10d_probs_vs_f1', fmt='.png', background='white')

    # ----------------- plotting -----------------
    # get datashader pngs, and plot a small sample of points over the top to guide eye with error bars.
    img_1d_1d = mpimg.imread('knn1d_1d_vs_f1.png')
    img_1d_probs = mpimg.imread('knn1d_probs_vs_f1.png')
    mpl.rcParams.update({'font.size': 10})
    markeredgewidth = 0.5
    mew = 0.5
    elinewidth = 0.5

    fig, axs = plt.subplots(1, 2, figsize=(14.5, 4))
    # --- 1D --- 1d ---
    plt.sca(axs[0])
    plt.imshow(img_1d_1d, extent=[xmin1d, xmax1d, ymin * 10,
                                  ymax * 10])  # make yaxis 10 times larger
    # fix ylabels after scaling the axis
    ylabels = axs[0].get_yticks()
    new_ylabels = [l / 10
                   for l in ylabels]  # account for factor of 10 increase
    axs[0].set_yticklabels(new_ylabels)
    axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f'))

    # plot sample over the top to get a feel for error bars
    samp = 2500
    plt.errorbar(df1d_g[0::samp]['feature1d_mean'],
                 df1d_g[0::samp]['f1'] * 10,
                 xerr=df1d_g[0::samp]['feature1d_std'],
                 yerr=df1d_g[0::samp]['f1err'] * 10,
                 color=galaxy_c,
                 elinewidth=elinewidth,
                 markeredgewidth=mew,
                 ls='none',
                 label='Galaxies')
    plt.errorbar(df1d_q[0::samp]['feature1d_mean'],
                 df1d_q[0::samp]['f1'] * 10,
                 xerr=df1d_q[0::samp]['feature1d_std'],
                 yerr=df1d_q[0::samp]['f1err'] * 10,
                 color=quasar_c,
                 elinewidth=elinewidth,
                 markeredgewidth=mew,
                 ls='none',
                 label='Quasars')
    plt.errorbar(df1d_s[0::samp]['feature1d_mean'],
                 df1d_s[0::samp]['f1'] * 10,
                 xerr=df1d_s[0::samp]['feature1d_std'],
                 yerr=df1d_s[0::samp]['f1err'] * 10,
                 color=star_c,
                 elinewidth=elinewidth,
                 markeredgewidth=mew,
                 ls='none',
                 label='Stars')

    plt.tick_params(axis='y', which='both', right=True)
    plt.minorticks_on()
    plt.xlabel('1D feature')
    plt.ylabel('F1 score in 1 dimensions')
    #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8)
    plt.xlim(-7, 12.5)
    plt.legend(frameon=False, loc='lower right')
    plt.tight_layout()
    fig.tight_layout()

    # --- 1D --- probs ---
    plt.sca(axs[1])
    xf = 2
    plt.imshow(img_1d_probs,
               extent=[xmin1d_probs * xf, xmax1d_probs * xf, ymin,
                       ymax])  # make xaxis larger
    # fix ylabels after scaling the axis
    #xlabels = axs[0].get_xticks()
    #new_xlabels = [l/xf for l in xlabels] # account for scaling axis
    axs[1].set_xticks(np.arange(0, 2.1, step=0.2))
    axs[1].set_xticklabels(np.arange(0, 1.1, step=0.1))
    #axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # doesn't work
    # getting some labels with 8 F****** decimal places without these two lines:
    labels = [item.get_text() for item in axs[1].get_xticklabels()]
    axs[1].set_xticklabels([str(round(float(label), 2)) for label in labels])

    # plot sample over the top to get a feel for error bars
    df1d_g2 = df1d_g[(df1d_g.f1 < 0.85) & (df1d_g.probs_mean < 0.85)][0::3000]
    plt.errorbar(df1d_g2['probs_mean'] * xf,
                 df1d_g2['f1'],
                 xerr=df1d_g2['probs_std'] * xf,
                 yerr=df1d_g2['f1err'],
                 color=galaxy_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Galaxies')
    df1d_q2 = df1d_q[(df1d_q.f1 < 0.85) & (df1d_q.probs_mean < 0.85)][0::3000]
    plt.errorbar(df1d_q2['probs_mean'] * xf,
                 df1d_q2['f1'],
                 xerr=df1d_q2['probs_std'] * xf,
                 yerr=df1d_q2['f1err'],
                 color=quasar_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Quasars')
    df1d_q2 = df1d_q[(df1d_q.f1 < 0.85) & (df1d_q.probs_mean < 0.75)][
        0::800]  # plot more at lower values in undersampled region
    plt.errorbar(df1d_q2['probs_mean'] * xf,
                 df1d_q2['f1'],
                 xerr=df1d_q2['probs_std'] * xf,
                 yerr=df1d_q2['f1err'],
                 color=quasar_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew)
    df1d_s2 = df1d_s[(df1d_s.f1 < 0.85) & (df1d_s.probs_mean < 0.85)][0::3000]
    plt.errorbar(df1d_s2['probs_mean'] * xf,
                 df1d_s2['f1'],
                 xerr=df1d_s2['probs_std'] * xf,
                 yerr=df1d_s2['f1err'],
                 color=star_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Stars')

    plt.tick_params(axis='y', which='both', right=True)
    plt.minorticks_on()
    plt.xlabel('Classification probability')
    plt.ylabel('F1 score in 1 dimension')
    #axs[0].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 1 dimension', verticalalignment='bottom', horizontalalignment='right', transform=axs[0].transAxes, color='black', fontsize=8)
    #plt.xlim(0.66,2)
    plt.tight_layout()

    #fig.subplots_adjust(wspace=0.1, hspace=0.1) # Must come after tight_layout to work! ... doesn't seem to work when using imshow :(
    fig.savefig('knn_plot_1D' + plot_label + '.pdf')
    plt.clf()

    # ---------------- 10-d ----------------

    # ----------------- plotting -----------------
    elinewidth = 0.2
    mpl.rcParams.update({'font.size':
                         10})  # else its really small in the paper

    img_10d_1d = mpimg.imread('knn10d_1d_vs_f1.png')
    img_10d_probs = mpimg.imread('knn10d_probs_vs_f1.png')

    fig, axs = plt.subplots(1, 2, figsize=(14.5, 4))
    xf = 2  # make x-axis twice as long as y.

    # --- 10D ---
    plt.sca(axs[0])
    plt.imshow(img_10d_1d, extent=[xmin10d, xmax10d, ymin * 10,
                                   ymax * 10])  # make yaxis 10 times larger
    # fix ylabels after scaling the axis
    ylabels = axs[0].get_yticks()
    new_ylabels = [l / 10
                   for l in ylabels]  # account for factor of 10 increase
    axs[0].set_yticklabels(new_ylabels)
    axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f'))

    # plot sample over the top to get a feel for error bars
    df10d_g2 = df10d_g[df10d_g.f1 < 0.95][
        0::
        500]  # only plot error bars below 0.95 because above this they are v small.
    plt.errorbar(df10d_g2['feature10d_mean'],
                 df10d_g2['f1'] * 10,
                 xerr=df10d_g2['feature10d_std'],
                 yerr=df10d_g2['f1err'] * 10,
                 color=galaxy_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Galaxies')
    df10d_q2 = df10d_q[df10d_q.f1 < 0.95][0::500]
    plt.errorbar(df10d_q2['feature10d_mean'],
                 df10d_q2['f1'] * 10,
                 xerr=df10d_q2['feature10d_std'],
                 yerr=df10d_q2['f1err'] * 10,
                 color=quasar_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Quasars')
    df10d_s2 = df10d_s[df10d_s.f1 < 0.95][0::500]
    plt.errorbar(df10d_s2['feature10d_mean'],
                 df10d_s2['f1'] * 10,
                 xerr=df10d_s2['feature10d_std'],
                 yerr=df10d_s2['f1err'] * 10,
                 color=star_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Stars')
    plt.tick_params(axis='y', which='both', right=True)
    plt.minorticks_on()
    plt.xlabel('1D feature')
    plt.ylabel('F1 score in 10 dimensions')
    #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8)
    plt.xlim(-7, 12.5)
    plt.tight_layout()

    # --- 10D --- probs ---
    plt.sca(axs[1])
    plt.imshow(img_10d_probs,
               extent=[xmin10d_probs * xf, xmax10d_probs * xf, ymin,
                       ymax])  # make xaxis larger
    # fix ylabels after scaling the axis
    #xlabels = axs[1].get_xticks()
    #new_xlabels = [l/xf for l in xlabels] # account for scaling axis
    #axs[1].set_xticklabels(new_xlabels)
    axs[1].set_xticks(np.arange(0, 2.1, step=0.2))
    axs[1].set_xticklabels(np.arange(0, 1.1, step=0.1))
    #axs[0].xaxis.set_major_formatter(FormatStrFormatter('%.1f')) # doesn't work
    labels = [item.get_text() for item in axs[1].get_xticklabels()]
    axs[1].set_xticklabels([str(round(float(label), 2)) for label in labels])

    # plot sample over the top to get a feel for error bars
    df10d_g2 = df10d_g[(df10d_g.f1 < 0.85) & (
        df10d_g.probs_mean < 0.85
    )][0::
       1000]  # only plot error bars below 0.95 because above this they are v small, and overcrowd the plot.
    plt.errorbar(df10d_g2['probs_mean'] * xf,
                 df10d_g2['f1'],
                 xerr=df10d_g2['probs_std'] * xf,
                 yerr=df10d_g2['f1err'],
                 color=galaxy_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Galaxy')
    df10d_q2 = df10d_q[(df10d_q.f1 < 0.85)
                       & (df10d_q.probs_mean < 0.85)][0::1000]
    plt.errorbar(df10d_q2['probs_mean'] * xf,
                 df10d_q2['f1'],
                 xerr=df10d_q2['probs_std'] * xf,
                 yerr=df10d_q2['f1err'],
                 color=quasar_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Quasar')
    df10d_s2 = df10d_s[(df10d_s.f1 < 0.85)
                       & (df10d_s.probs_mean < 0.85)][0::1000]
    plt.errorbar(df10d_s2['probs_mean'] * xf,
                 df10d_s2['f1'],
                 xerr=df10d_s2['probs_std'] * xf,
                 yerr=df10d_s2['f1err'],
                 color=star_c,
                 elinewidth=elinewidth,
                 ls='none',
                 markeredgewidth=mew,
                 label='Star')

    plt.tick_params(axis='y', which='both', right=True)
    plt.minorticks_on()
    plt.xlabel('Classification probability')
    plt.ylabel('F1 score in 10 dimensions')
    plt.legend(frameon=False, loc='upper left')
    #axs[1].text(0.95, 0.01, 'calculated from 10000 nearest neighbours in 10 dimensions', verticalalignment='bottom', horizontalalignment='right', transform=axs[1].transAxes, color='black', fontsize=8)
    plt.tight_layout()
    fig.tight_layout()
    #plt.xlim(0.66,2)
    fig.savefig('knn_plot_10D' + plot_label + '.pdf')
from datashader import utils

os.chdir("//sbs2003/Daten-CME/")

t1 = time.time()


def data_pool(file):
    df = dd.read_parquet(file)
    print(file + " loaded")
    return df


data = None

if __name__ == '__main__':
    print(datetime.datetime.now())
    t1 = time.time()
    files = glob.iglob('*.csv_2_.parquet')
    p = Pool(os.cpu_count())
    data = dd.concat(p.map(data_pool, files))  # reset_index(drop=True))
    canvas = ds.Canvas(x_range=(-74.25, -73.7),
                       y_range=(40.5, 41),
                       plot_width=8000,
                       plot_height=8000)
    agg = canvas.points(data, 'End_Lon', 'End_Lat')
    pic = tf.set_background(tf.shade(agg, cmap=reversed(blues)),
                            color="#364564")  #364564
    utils.export_image(pic, "NYCPlot fn1", fmt=".png")
    print("time needed", time.time() - t1)
Example #15
0
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from IPython.core.display import HTML, display

print("Begin...")
#df = pd.read_hdf('census.h5', 'census')

USA =          ((-13884029,  -7453304), (2698291, 6455972))
plot_width  = int(1000)
plot_height = int(plot_width*7.0/12)

df = pd.DataFrame(
      {'meterswest': np.random.random(10000)*1000e3 + -10668666.5,
      'metersnorth': np.random.random(10000)*1000e3 + 4577131.5}
      )
print(df.tail())


background = "black"
display(HTML("<style>.container { width:100% !important; }</style>"))

print("Computing aggregate...")
cvs = ds.Canvas(plot_width, plot_height, *USA)
agg = cvs.points(df, 'meterswest', 'metersnorth')

print("Making Image ...")
cmap = colormap_select(Hot,0.2, reverse=(background!="black"))
interp = tf.interpolate(agg, cmap = cmap, how='eq_hist')
export_image(interp, "census_ds_hot_eq_hist", background=background)