def scatter_dist_by_mappings(dataset, x_kdims, y_kdims, mappings, selection_dim="Gene", datashade_=False, dynspread_=False, ): data_groups = {name: dataset.sel({selection_dim: genes}) for name, genes in mappings.items()} data_group_dfs = {k: v[[x_kdims, y_kdims]].to_dataframe() for k, v in data_groups.items()} points = {k: hv.Points(val, kdims=[x_kdims, y_kdims]) for k, val in data_group_dfs.items()} dist_x = {k: univariate_kde(hv.Distribution(p, kdims=[y_kdims], group="dist_x"), n_samples=1000) for k, p in points.items()} dist_y = {k: univariate_kde(hv.Distribution(p, kdims=[x_kdims], group="dist_y"), n_samples=1000) for k, p in points.items()} if datashade_: points_overlay = datashade(hv.NdOverlay(points)) if dynspread_: points_overlay = dynspread(points_overlay) else: points_overlay = hv.NdOverlay(points) return points_overlay << hv.NdOverlay(dist_x) << hv.NdOverlay(dist_y)
def test_violin_simple(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(violin_width=0.7)) qmin, q1, q2, q3, qmax = (np.percentile(values, q=q) for q in range(0, 125, 25)) iqr = q3 - q1 upper = min(q3 + 1.5 * iqr, np.nanmax(values)) lower = max(q1 - 1.5 * iqr, np.nanmin(values)) r1, r2 = violin.range(0) kde = univariate_kde(violin, cut=5) xs, ys = (kde.dimension_values(i) for i in range(2)) ys = (ys / ys.max()) * (0.7 / 2.) ys = [('', ) + (sign * y, ) for sign, vs in ((-1, ys), (1, ys[::-1])) for y in vs] kde = {'x': np.concatenate([xs, xs[::-1]]), 'y': ys} plot = bokeh_renderer.get_plot(violin) self.assertEqual(plot.handles['x_range'].factors, ['']) self.assertEqual(plot.handles['y_range'].start, r1) self.assertEqual(plot.handles['y_range'].end, r2) self.assertIn('scatter_1_glyph_renderer', plot.handles) self.assertIn('vbar_1_glyph_renderer', plot.handles) seg_source = plot.handles['segment_1_source'] self.assertEqual(seg_source.data['x'], [('', 0)]) self.assertEqual(seg_source.data['y0'], [lower]) self.assertEqual(seg_source.data['y1'], [upper]) scatter_source = plot.handles['scatter_1_source'] self.assertEqual(scatter_source.data['x'], [('', 0)]) self.assertEqual(scatter_source.data['y'], [q2]) patch_source = plot.handles['patches_1_source'] self.assertEqual(patch_source.data['xs'], [kde['y']]) self.assertEqual(patch_source.data['ys'], [kde['x']])
def test_violin_simple(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(violin_width=0.7)) qmin, q1, q2, q3, qmax = (np.percentile(values, q=q) for q in range(0,125,25)) iqr = q3 - q1 upper = min(q3 + 1.5*iqr, np.nanmax(values)) lower = max(q1 - 1.5*iqr, np.nanmin(values)) r1, r2 = violin.range(0) kde = univariate_kde(violin, cut=5) xs, ys = (kde.dimension_values(i) for i in range(2)) ys = (ys/ys.max())*(0.7/2.) ys = [('',)+(sign*y,) for sign, vs in ((-1, ys), (1, ys[::-1])) for y in vs] kde = {'x': np.concatenate([xs, xs[::-1]]), 'y': ys} plot = bokeh_renderer.get_plot(violin) self.assertEqual(plot.handles['x_range'].factors, ['']) self.assertEqual(plot.handles['y_range'].start, r1) self.assertEqual(plot.handles['y_range'].end, r2) self.assertIn('scatter_1_glyph_renderer', plot.handles) self.assertIn('vbar_1_glyph_renderer', plot.handles) seg_source = plot.handles['segment_1_source'] self.assertEqual(seg_source.data['x'], [('', 0)]) self.assertEqual(seg_source.data['y0'], [lower]) self.assertEqual(seg_source.data['y1'], [upper]) scatter_source = plot.handles['scatter_1_source'] self.assertEqual(scatter_source.data['x'], [('', 0)]) self.assertEqual(scatter_source.data['y'], [q2]) patch_source = plot.handles['patches_1_source'] self.assertEqual(patch_source.data['xs'], [kde['y']]) self.assertEqual(patch_source.data['ys'], [kde['x']])
def test_violin_inner_stick(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(inner='stick')) kde = univariate_kde(violin, cut=5) xs = kde.dimension_values(0) plot = bokeh_renderer.get_plot(violin) self.assertIn('segment_1_glyph_renderer', plot.handles) segments = np.array([xs[np.argmin(np.abs(xs - v))] for v in values]) self.assertEqual(plot.handles['segment_1_source'].data['x'], segments)
def test_violin_inner_stick(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(inner='stick')) kde = univariate_kde(violin, cut=5) xs = kde.dimension_values(0) plot = bokeh_renderer.get_plot(violin) self.assertIn('segment_1_glyph_renderer', plot.handles) segments = np.array([xs[np.argmin(np.abs(xs-v))] for v in values]) self.assertEqual(plot.handles['segment_1_source'].data['x'], segments)
def hv_scatter_dist(dataset, x_kdims, y_kdims, datashade_=False, dynspread_=False): if dynspread_ and not datashade_: warnings.warn("Dynspread can only be used with datashade, setting both to true.") datashade_ = True df = dataset[[x_kdims, y_kdims]].to_dataframe() points = hv.Points(df, kdims=[x_kdims, y_kdims]) dist_x = univariate_kde(hv.Distribution(points, kdims=[y_kdims], group="dist_x"), n_samples=1000) dist_y = univariate_kde(hv.Distribution(points, kdims=[x_kdims], group="dist_y"), n_samples=1000) if datashade_: points = datashade(points) if dynspread_: points = dynspread(points) return points << dist_x << dist_y
def test_violin_inner_quartiles(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(inner='quartiles')) kde = univariate_kde(violin, cut=5) xs = kde.dimension_values(0) plot = bokeh_renderer.get_plot(violin) self.assertIn('segment_1_glyph_renderer', plot.handles) seg_source = plot.handles['segment_1_source'] q1, q2, q3 = (np.percentile(values, q=q) for q in range(25, 100, 25)) y0, y1, y2 = [xs[np.argmin(np.abs(xs - v))] for v in (q1, q2, q3)] self.assertEqual(seg_source.data['x'], np.array([y0, y1, y2]))
def test_violin_inner_quartiles(self): values = np.random.rand(100) violin = Violin(values).opts(plot=dict(inner='quartiles')) kde = univariate_kde(violin, cut=5) xs = kde.dimension_values(0) plot = bokeh_renderer.get_plot(violin) self.assertIn('segment_1_glyph_renderer', plot.handles) seg_source = plot.handles['segment_1_source'] q1, q2, q3 = (np.percentile(values, q=q) for q in range(25,100,25)) y0, y1, y2 = [xs[np.argmin(np.abs(xs-v))] for v in (q1, q2, q3)] self.assertEqual(seg_source.data['x'], np.array([y0, y1, y2]))
def test_univariate_kde_nans(self): kde = univariate_kde(self.dist_nans, n_samples=5, bin_range=(0, 4)) xs = np.arange(5) ys = [0, 0, 0, 0, 0] area = Area((xs, ys), 'Value', ('Value_density', 'Value Density')) self.assertEqual(kde, area)
def test_univariate_kde_flat_distribution(self): dist = Distribution([1, 1, 1]) kde = univariate_kde(dist, n_samples=5, bin_range=(0, 4)) area = Area([], 'Value', ('Value_density', 'Value Density')) self.assertEqual(kde, area)
def test_univariate_kde(self): kde = univariate_kde(self.dist, n_samples=5, bin_range=(0, 4)) xs = np.arange(5) ys = [0.17594505, 0.23548218, 0.23548218, 0.17594505, 0.0740306] area = Area((xs, ys), 'Value', ('Value_density', 'Value Density')) self.assertEqual(kde, area)
def test_univariate_kde_nans(self): kde = univariate_kde(self.dist_nans, n_samples=5, bin_range=(0, 4)) xs = np.arange(5) ys = [0, 0, 0, 0, 0] area = Area((xs, ys), 'Value', ('Value_density', 'Density')) self.assertEqual(kde, area)
def test_univariate_kde_flat_distribution(self): dist = Distribution([1, 1, 1]) kde = univariate_kde(dist, n_samples=5, bin_range=(0, 4)) area = Area([], 'Value', ('Value_density', 'Density')) self.assertEqual(kde, area)
def test_univariate_kde(self): kde = univariate_kde(self.dist, n_samples=5, bin_range=(0, 4)) xs = np.arange(5) ys = [0.17594505, 0.23548218, 0.23548218, 0.17594505, 0.0740306] area = Area((xs, ys), 'Value', ('Value_density', 'Density')) self.assertEqual(kde, area)