def test_categorical_mean(ddf): sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]], [[nan, 7, nan, nan], [nan, nan, nan, 17]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f64'))) assert_eq_xr(agg, out)
def test_categorical_sum(ddf): sol = np.array([[[10, 0, 0, 0], [0, 0, 60, 0]], [[0, 35, 0, 0], [0, 0, 0, 85]]]) out = xr.DataArray( sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat']) ) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64'))) assert_eq_xr(agg, out) sol = np.array([[[8.0, 0, 0, 0], [0, 0, 60.0, 0]], [[0, 35.0, 0, 0], [0, 0, 0, 85.0]]]) out = xr.DataArray( sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat']) ) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64'))) assert_eq_xr(agg, out)
def test_categorical_var(ddf): sol = np.array([[[ 2.5, nan, nan, nan], [ nan, nan, 2., nan]], [[ nan, 2., nan, nan], [ nan, nan, nan, 2.]]]) out = xr.DataArray( sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f32'))) assert_eq_xr(agg, out, True) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f64'))) assert_eq_xr(agg, out, True)
def test_categorical_var(ddf): if cudf and isinstance(ddf._meta, cudf.DataFrame): pytest.skip("The 'var' reduction is yet supported on the GPU") sol = np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]], [[nan, 2., nan, nan], [nan, nan, nan, 2.]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f32'))) assert_eq_xr(agg, out, True) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.var('f64'))) assert_eq_xr(agg, out, True)
def _compute_datashader_assets(data, x, aggregate_col, aggregate_fn, color_palette): aggregator = None cmap = {"cmap": color_palette} if isinstance(data[x].dtype, cudf.core.dtypes.CategoricalDtype): if ds_version >= "0.11": aggregator = ds.by( x, getattr(ds, aggregate_fn)(aggregate_col), ) else: print("only count_cat supported by datashader <=0.10") aggregator = ds.count_cat(x) cmap = { "color_key": { k: v for k, v in zip( list(data[x].cat.categories), color_palette, ) } } else: if aggregate_fn: aggregator = getattr(ds, aggregate_fn)(aggregate_col) return aggregator, cmap
def _get_points(self): embeddings = self.embeddings classes = self.classes if (self.label_flag) and (classes is not None): data = pd.DataFrame(embeddings) data.columns = ['ivis 1', 'ivis 2'] data['label'] = classes num_ks = len(np.unique(classes)) color_key = list(enumerate(Sets1to3[0:num_ks])) embed = { k: hv.Points(data.values[classes == k, :], ['ivis 1', 'ivis 2'], 'k', label=str(k)).opts(color=v, size=0) for k, v in color_key } dse = dynspread( datashade(hv.NdOverlay(embed, kdims=['k']), aggregator=ds.by('k', ds.count()))) color_points = hv.NdOverlay({ k: hv.Points([0, 0]).opts(color=v, size=0) for k, v in color_key }) points = color_points * dse else: points = datashade(hv.Points(embeddings)) points.opts(height=400, width=500, xaxis=None, yaxis=None) return points
def test_count_cat(ddf): sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], [[0, 5, 0, 0], [0, 0, 0, 5]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.count_cat('cat')) assert_eq_xr(agg, out) # categorizing by (cat_int-10)%4 ought to give the same result out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.count())) assert_eq_xr(agg, out) # easier to write these tests in here, since we expect the same result with only slight tweaks # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[0], [0]], [[0], [0]]], axis=2) # categorizing by binning the integer arange columns using [0,20] into 4 bins. Same result as for count_cat for col in 'i32', 'i64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.count())) assert_eq_xr(agg, out) # as above, but for the float arange columns. Element 2 has a nan, so the first bin is one short, and the nan bin is +1 sol[0, 0, 0] = 4 sol[0, 0, 4] = 1 for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.count())) assert_eq_xr(agg, out)
def test_categorical_std(ddf): if cudf and isinstance(ddf._meta, cudf.DataFrame): pytest.skip("The 'std' reduction is yet supported on the GPU") sol = np.sqrt( np.array([[[2.5, nan, nan, nan], [nan, nan, 2., nan]], [[nan, 2., nan, nan], [nan, nan, nan, 2.]]])) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f32'))) assert_eq_xr(agg, out, True) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.std('f64'))) assert_eq_xr(agg, out, True) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.std('f32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.std('f64'))) assert_eq_xr(agg, out) # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2) for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.std(col))) assert_eq_xr(agg, out)
def test_categorical_mean(ddf): sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]], [[nan, 7, nan, nan], [nan, nan, nan, 17]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.mean('f64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.mean('f64'))) assert_eq_xr(agg, out) # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2) for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.mean(col))) assert_eq_xr(agg, out)
def test_categorical_mean_binning(ddf): if cudf and isinstance(ddf._meta, cudf.DataFrame): pytest.skip( "The categorical binning of 'mean' reduction is yet supported on the GPU" ) sol = np.array([[[2, nan, nan, nan], [nan, nan, 12, nan]], [[nan, 7, nan, nan], [nan, nan, nan, 17]]]) # add an extra category (this will count nans and out of bounds) sol = np.append(sol, [[[nan], [nan]], [[nan], [nan]]], axis=2) for col in 'f32', 'f64': out = xr.DataArray(sol, coords=(coords + [range(5)]), dims=(dims + [col])) agg = c.points(ddf, 'x', 'y', ds.by(ds.category_binning(col, 0, 20, 4), ds.mean(col))) assert_eq_xr(agg, out)
def test_categorical_sum(ddf): sol = np.array([[[10, nan, nan, nan], [nan, nan, 60, nan]], [[nan, 35, nan, nan], [nan, nan, nan, 85]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('i64'))) assert_eq_xr(agg, out) out = xr.DataArray(sol, coords=(coords + [range(4)]), dims=(dims + ['cat_int'])) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i32'))) assert_eq_xr(agg, out) agg = c.points( ddf, 'x', 'y', ds.by(ds.category_modulo('cat_int', modulo=4, offset=10), ds.sum('i64'))) assert_eq_xr(agg, out) sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]], [[nan, 35.0, nan, nan], [nan, nan, nan, 85.0]]]) out = xr.DataArray(sol, coords=(coords + [['a', 'b', 'c', 'd']]), dims=(dims + ['cat'])) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f32'))) assert_eq_xr(agg, out) agg = c.points(ddf, 'x', 'y', ds.by('cat', ds.sum('f64'))) assert_eq_xr(agg, out)
def create_plot( data, x, y, plot_type="scatter", selected=None, show_selected=True, slow_render=False, legend=True, colours=True, smaller_axes_limits=False, bounds=None, legend_position=None, ): assert x in list( data.columns), f"Column {x} is not a column in your dataframe." assert y in list( data.columns), f"Column {y} is not a column in your dataframe." if bounds is not None: data = data[data[x] >= bounds[0]] data = data[data[y] <= bounds[1]] data = data[data[x] <= bounds[2]] data = data[data[y] >= bounds[3]] if plot_type == "scatter": p = hv.Points( data, [x, y], ).opts(active_tools=["pan", "wheel_zoom"]) elif plot_type == "line": p = hv.Path( data, [x, y], ).opts(active_tools=["pan", "wheel_zoom"]) if show_selected: if selected is not None: cols = list(data.columns) if len(selected.data[cols[0]]) == 1: selected = pd.DataFrame(selected.data, columns=cols, index=[0]) if bounds is not None: if ((selected[x][0] < bounds[0]) or (selected[y][0] > bounds[1]) or (selected[x][0] > bounds[2]) or (selected[y][0] < bounds[3])): selected = pd.DataFrame(columns=cols) else: selected = pd.DataFrame(columns=cols) selected_plot = hv.Scatter( selected, x, y, ).opts( fill_color="black", marker="circle", size=10, active_tools=["pan", "wheel_zoom"], ) if colours: color_key = config.settings["label_colours"] color_points = hv.NdOverlay({ config.settings["labels_to_strings"][f"{n}"]: hv.Points([0, 0], label=config.settings["labels_to_strings"][f"{n}"]).opts( style=dict(color=color_key[n], size=0)) for n in color_key }) if smaller_axes_limits: max_x = np.max(data[x]) min_x = np.min(data[x]) max_y = np.max(data[y]) min_y = np.min(data[y]) x_sd = np.std(data[x]) x_mu = np.mean(data[x]) y_sd = np.std(data[y]) y_mu = np.mean(data[y]) max_x = np.min([x_mu + 4 * x_sd, max_x]) min_x = np.max([x_mu - 4 * x_sd, min_x]) max_y = np.min([y_mu + 4 * y_sd, max_y]) min_y = np.max([y_mu - 4 * y_sd, min_y]) if show_selected: if selected is not None: if selected.shape[0] > 0: max_x = np.max([max_x, np.max(selected[x])]) min_x = np.min([min_x, np.min(selected[x])]) max_y = np.max([max_y, np.max(selected[y])]) min_y = np.min([min_y, np.min(selected[y])]) if colours: if smaller_axes_limits: plot = dynspread( datashade( p, color_key=color_key, aggregator=ds.by(config.settings["label_col"], ds.count()), ).opts(xlim=(min_x, max_x), ylim=(min_y, max_y), responsive=True), threshold=0.75, how="saturate", ) else: plot = dynspread( datashade( p, color_key=color_key, aggregator=ds.by(config.settings["label_col"], ds.count()), ).opts(responsive=True), threshold=0.75, how="saturate", ) else: if smaller_axes_limits: plot = dynspread( datashade(p, ).opts(xlim=(min_x, max_x), ylim=(min_y, max_y), responsive=True), threshold=0.75, how="saturate", ).redim.range(xdim=(min_x, max_x), ydim=(min_y, max_y)) else: plot = dynspread( datashade(p, ).opts(responsive=True), threshold=0.75, how="saturate", ) if slow_render: plot = p if show_selected and (selected is not None): plot = plot * selected_plot if legend and colours: plot = plot * color_points if legend_position is not None: plot = plot.opts(legend_position=legend_position) return plot
def plot(self, x_var=None, y_var=None): """Create a basic scatter plot of the data with the selected axis. The data is represented as a Holoviews Datashader object allowing for large numbers of points to be rendered at once. Plotted using a Bokeh renderer, the user has full manuverabilty of the data in the plot. Returns ------- plot : Holoviews Object A Holoviews plot """ if x_var is None: x_var = self.X_variable if y_var is None: y_var = self.Y_variable p = hv.Points( self.df, [x_var, y_var], ).opts(active_tools=["pan", "wheel_zoom"]) sample_region = hv.Points( self.sample_region, [x_var, y_var], ).opts(active_tools=["pan", "wheel_zoom"]) cols = list(self.df.columns) if len(self.src.data[cols[0]]) == 1: selected = pd.DataFrame(self.src.data, columns=cols, index=[0]) else: selected = pd.DataFrame(columns=cols) selected_plot = hv.Scatter( selected, x_var, y_var, ).opts( fill_color="black", marker="circle", size=10, active_tools=["pan", "wheel_zoom"], ) color_key = config.settings["label_colours"] color_points = hv.NdOverlay({ config.settings["labels_to_strings"][f"{n}"]: hv.Points([0, 0], label=config.settings["labels_to_strings"][f"{n}"]).opts( style=dict(color=color_key[n], size=0)) for n in color_key }) max_x = np.max(self.df[x_var]) min_x = np.min(self.df[x_var]) max_y = np.max(self.df[y_var]) min_y = np.min(self.df[y_var]) x_sd = np.std(self.df[x_var]) x_mu = np.mean(self.df[x_var]) y_sd = np.std(self.df[y_var]) y_mu = np.mean(self.df[y_var]) max_x = np.min([x_mu + 4 * x_sd, max_x]) min_x = np.max([x_mu - 4 * x_sd, min_x]) max_y = np.min([y_mu + 4 * y_sd, max_y]) min_y = np.max([y_mu - 4 * y_sd, min_y]) if selected.shape[0] > 0: max_x = np.max([max_x, np.max(selected[x_var])]) min_x = np.min([min_x, np.min(selected[x_var])]) max_y = np.max([max_y, np.max(selected[y_var])]) min_y = np.min([min_y, np.min(selected[y_var])]) new_key = {} for k in list(color_key.keys()): new_key[k] = "#333333" all_points = dynspread( datashade( p, color_key=new_key, aggregator=ds.by(config.settings["label_col"], ds.count()), ).opts( xlim=(min_x, max_x), ylim=(min_y, max_y), responsive=True, alpha=0.5, shared_axes=False, ), threshold=0.3, how="over", ) sample_region_plot = dynspread( datashade( sample_region, color_key=color_key, aggregator=ds.by(config.settings["label_col"], ds.count()), min_alpha=70, alpha=100, ).opts( xlim=(min_x, max_x), ylim=(min_y, max_y), responsive=True, shared_axes=False, ), threshold=0.7, how="saturate", ) plot = (all_points * sample_region_plot * selected_plot * color_points).opts(shared_axes=False, ) return plot
df_mle = _create_df_mles_conf_ints() colors = ['crimson', 'orange', 'greenyellow', 'forestgreen', 'blue'] # generate the base Points figure points = hv.Points( data=df_reps_mles, kdims=['alpha', 'beta (1/s)'], vdims='concentration', ).groupby('concentration').overlay() # use datashader so we're not plotting tons of points plot = hv.operation.datashader.dynspread( hv.operation.datashader.datashade( points, aggregator=datashader.by('concentration', datashader.count()), color_key=colors, )) # make segments to show range of D D_segments = hv.NdOverlay({ concentration: hv.Segments(( df_mle.loc[(df_mle['concentration'] == concentration) & (df_mle['parameter'] == 'alpha'), 'conf_start'], df_mle.loc[(df_mle['concentration'] == concentration) & (df_mle['parameter'] == 'beta (1/s)'), 'mle'], df_mle.loc[(df_mle['concentration'] == concentration) & (df_mle['parameter'] == 'alpha'), 'conf_end'], df_mle.loc[(df_mle['concentration'] == concentration) & (df_mle['parameter'] == 'beta (1/s)'), 'mle'], ), ).opts(color=color, line_width=2)