class jitter: #https://bokeh.pydata.org/en/latest/docs/gallery/jitter.html colors = [ "red", "olive", "darkred", "goldenrod", "skyblue", "orange", "salmon" ] p1 = figure(plot_width=600, plot_height=300, title="Years vs mpg without jittering") p2 = figure(plot_width=600, plot_height=300, title="Years vs mpg with jittering") for i, year in enumerate(list(df.yr.unique())): y = df[df['yr'] == year]['mpg'] color = colors[i % len(colors)] p1.circle(x=year, y=y, color=color) p2.circle(x={ 'value': year, 'transform': Jitter(width=1) }, y=y, color=color) output_file("jitter.html") show(column(p1, p2))
import numpy as np from bokeh.models import Jitter from bokeh.plotting import figure, show, output_file p = figure(plot_width=500, plot_height=400, x_range=(0,3), y_range=(0,10), title="Demonstration of Jitter transform") y1 = np.random.random(2500) * 10 y2 = np.random.normal(size=2500)*2 + 5 p.circle(x={'value': 1, 'transform': Jitter(width=0.4)}, y=y1, color="navy", alpha=0.3) p.circle(x={'value': 2, 'transform': Jitter(width=0.4)}, y=y2, color="firebrick", alpha=0.3) output_file("jitter.html") show(p)
def plot_clusters(vectors, clusters, texts, labels, algorithm_name='Clusterisation', plot_path='', plot_name='clustering.html', plot_size=1000, plot_title="Embeddings clusters"): """ Plots clusters of embeddings . Parameters ---------- vectors : list A list of embeddings clusters : list A list of true clusters texts : list A list of texts corresponding to the embeddings clusters : list A list of predicted clusters algorithm : string, optional A name of clustering algorithm plot_path : int, optional A path to output plot plot_name : int, optional A name of output plot plot_size : int, optional A size of output plot plot_title : string, optional A title of output plot """ colors = [] unique_labels = set(labels) if len(unique_labels) > 100: pal = Turbo256 else: pal = Category20[20] if len(unique_labels) <= len(pal): palette = pal else: palette = [ pal[each] for each in np.linspace( 0, 19, num=len(unique_labels), dtype=np.int) ] #assign colors from palette for cl in labels: colors.append(palette[cl]) source = ColumnDataSource(data=dict(x=vectors[:, 0], y=vectors[:, 1], colors=colors, texts=texts, clusters=clusters)) TOOLTIPS = [ ("index", "$index"), ("(x,y)", "($x, $y)"), ("cluster", "@clusters"), ("text", "@texts"), ] p = figure(output_backend="webgl", title=algorithm_name, plot_width=plot_size, plot_height=plot_size, tooltips=TOOLTIPS) p.scatter(x={ 'field': 'x', 'transform': Jitter(width=0.4) }, y={ 'field': 'y', 'transform': Jitter(width=0.4) }, color='colors', alpha=0.5, source=source) output_file(plot_path + plot_name, title=plot_title) show(p)
import numpy as np from bokeh.models import Button, Column, ColumnDataSource, CustomJS, Jitter, LabelSet from bokeh.plotting import figure, output_file, show N = 1000 source = ColumnDataSource(data=dict( x=np.ones(N), xn=2*np.ones(N), xu=3*np.ones(N), y=np.random.random(N)*10 )) normal = Jitter(width=0.2, distribution="normal") uniform = Jitter(width=0.2, distribution="uniform") p = figure(x_range=(0, 4), y_range=(0,10), toolbar_location=None, x_axis_location="above") p.circle(x='x', y='y', color='firebrick', source=source, size=5, alpha=0.5) p.circle(x='xn', y='y', color='olive', source=source, size=5, alpha=0.5) p.circle(x='xu', y='y', color='navy', source=source, size=5, alpha=0.5) label_data = ColumnDataSource(data=dict( x=[1,2,3], y=[0, 0, 0], t=['Original', 'Normal', 'Uniform'] )) label_set = LabelSet(x='x', y='y', text='t', y_offset=-4, source=label_data, render_mode='css', text_baseline="top", text_align='center') p.add_layout(label_set) callback = CustomJS(args=dict(source=source, normal=normal, uniform=uniform), code=""" const data = source.data; for (let i = 0; i < data.y.length; i++) { data.xn[i] = normal.compute(data.x[i] + 1); }
from bokeh.models import Jitter from bokeh.layouts import column from bokeh.plotting import figure, show, output_file from bokeh.sampledata.autompg import autompg as df colors = ["red", "olive", "darkred", "goldenrod", "skyblue", "orange", "salmon"] p1 = figure(plot_width=600, plot_height=300, title="Years vs mpg without jittering") p2 = figure(plot_width=600, plot_height=300, title="Years vs mpg with jittering") for i, year in enumerate(list(df.yr.unique())): y = df[df['yr'] == year]['mpg'] color = colors[i % len(colors)] p1.circle(x=year, y=y, color=color) p2.circle(x={'value': year, 'transform': Jitter(width=1)}, y=y, color=color) output_file("jitter.html") show(column(p1, p2))
(df_6['Dept'] == 'Engineering') | (df_6['Dept'] == 'Animal Control')] df_6.reset_index(drop=True, inplace=True) colors = brewer['Set1'][7] p6 = figure(y_range=(0, 6200), tools=["hover", 'box_zoom', 'reset', 'save'], tooltips="@Topic; @Count calls") for i, d in enumerate(list(df_6['Dept'].unique())): y = df_6[df_6['Dept'] == d][['Count', 'Topic']] color = colors[i % len(colors)] p6.circle(x={ 'value': i, 'transform': Jitter(width=0.4) }, y='Count', source=y, color=color, size=10, alpha=0.75) sw_trash = df_6[df_6['Topic'] == "Miscellaneous Trash Information"] p6.diamond(x={ 'value': 1, 'transform': Jitter(width=0.4) }, y=5999, source=sw_trash, size=24,
def test_Jitter() -> None: jitter = Jitter() assert jitter.mean == 0 assert jitter.width == 1 assert jitter.distribution == "uniform" assert jitter.range is None
def modify_doc(doc): SIZES = list(range(6, 22, 3)) # define available palettes palettes = { k: v for k, v in cc.palette.items() if ("_" not in k and k not in ["bkr", "coolwarm", "bjy", "bky", "gwv"]) } ################# # data handling # ################# def get_data(path, force_discrete_colorable): """Read data from csv and transform map coordinates.""" data = pd.read_csv(path) # data from columns in force_discrete_colorable will be treated as discrete even if numeric for col in data.columns: if col in force_discrete_colorable: data[col] = data[col].apply(str) data = data.applymap(lambda x: "NaN" if pd.isnull(x) else x) # transform coords to map projection wgs84 = pyproj.Proj(init="epsg:4326") web_mer = pyproj.Proj(init="epsg:3857") data["easting"] = "NaN" data["northing"] = "NaN" data["easting"] = data["easting"].astype("float64") data["northing"] = data["northing"].astype("float64") data.loc[pd.notnull(data["lon"]), "easting"], data.loc[ pd.notnull(data["lat"]), "northing"] = zip(*data.loc[ pd.notnull(data["lon"]) & pd.notnull(data["lat"])].apply(lambda x: pyproj.transform( wgs84, web_mer, x["lon"], x["lat"]), axis=1)) # show unknown locations on map in antarctic default_wgs84 = config.get('default_coords') or {'lon': 0, 'lat': -80} default_web_mer = dict( zip(("lon", "lat"), pyproj.transform(wgs84, web_mer, default_wgs84["lon"], default_wgs84["lat"]))) data.easting = data.easting.apply(lambda x: default_web_mer["lon"] if pd.isnull(x) else x) data.northing = data.northing.apply(lambda x: default_web_mer["lat"] if pd.isnull(x) else x) return data def update_df(_df, _size, _color, _palette, _continuous, _discrete_sizeable, _discrete_colorable): """update the size and color columns of the given df based on widget selections and column classifications""" _df["size"] = 9 if _size != 'None' and _size in _discrete_sizeable: values = _df[_size][pd.notnull(_df[_size])].unique() if all([val.isnumeric() for val in values]): values = sorted(values, key=lambda x: float(x)) codes = dict(zip(values, range(len(values)))) groups = [codes[val] for val in _df[_size].values] _df["size"] = [SIZES[xx] for xx in groups] elif _size != 'None' and _size in _continuous: try: groups = pd.qcut(_df[_size].values, len(SIZES)) except ValueError: groups = pd.cut(_df[_size].values, len(SIZES)) _df["size"] = [SIZES[xx] for xx in groups.codes] _df["color"] = "#31AADE" if _color != 'None' and _color in _discrete_colorable: values = _df[_color][pd.notnull(_df[_color])].unique() colors = linear_palette(palettes[_palette], len(values)) if all([val.isnumeric() for val in values]): values = sorted(values, key=lambda x: float(x)) codes = dict(zip(values, range(len(values)))) groups = [codes[val] for val in _df[_color].values] _df["color"] = [colors[xx] for xx in groups] elif _color != 'None' and _color in _continuous: colors = palettes[_palette] groups = pd.cut(_df[_color].values, len(colors)) _df["color"] = [colors[xx] for xx in groups.codes] def create_source(_df, _size, _color, _palette, _continuous, _discrete_sizeable, _discrete_colorable): """Update df and return new ColumnDataSource.""" update_df(_df, _size, _color, _palette, _continuous, _discrete_sizeable, _discrete_colorable) _df["ns"] = _df["northing"] _df["es"] = _df["easting"] # create a ColumnDataSource from the data set return ColumnDataSource(_df) def update_source(_source, _df, _size, _color, _palette, _continuous, _discrete_sizeable, _discrete_colorable): """update df and and propagate changes to source""" update_df(_df, _size, _color, _palette, _continuous, _discrete_sizeable, _discrete_colorable) # create a ColumnDataSource from the data set _source.data.update({"size": _df["size"], "color": _df["color"]}) ####################### # Data Visualizations # ####################### def create_crossfilter(_df, _source, _discrete, _x, _y): """Return a crossfilter plot linked to ColumnDataSource '_source'.""" kw = dict() if _x in _discrete: values = _df[_x][pd.notnull(_df[_x])].unique() if all([val.isnumeric() for val in values]): kw["x_range"] = sorted(values, key=lambda x: float(x)) else: kw["x_range"] = sorted(values) if _y in _discrete: values = _df[_y][pd.notnull(_df[_y])].unique() if all([val.isnumeric() for val in values]): kw["y_range"] = sorted(values, key=lambda x: float(x)) else: kw["y_range"] = sorted(values) x_title = _x.title() y_title = _y.title() p = figure( plot_height=700, plot_width=700, # responsive=True, tools="wheel_zoom, pan, save, reset, box_select, tap", active_drag="box_select", active_scroll="wheel_zoom", title="%s vs %s" % (y_title, x_title), **kw, ) if _x in _discrete: p.xaxis.major_label_orientation = pd.np.pi / 4 # plot data on crossfilter p.circle( x=_x, y=_y, color="color", size="size", source=_source, line_color="white", alpha=0.6, # set visual properties for selected glyphs selection_fill_color="color", selection_fill_alpha=0.6, selection_line_color="white", selection_line_alpha=0.6, # set visual properties for non-selected glyphs nonselection_fill_color="white", nonselection_fill_alpha=0.1, nonselection_line_color="color", nonselection_line_alpha=0.6, ) return p def create_map(_source): """Return map linked to ColumnDataSource '_source'.""" stamen = copy.copy(STAMEN_TERRAIN) # create map bound = 20000000 # meters m = figure( plot_height=700, plot_width=700, # responsive=True, tools="wheel_zoom, pan, reset, box_select, tap", active_drag="box_select", active_scroll="wheel_zoom", x_range=(-bound, bound), y_range=(-bound, bound)) m.axis.visible = False m.add_tile(stamen) # plot data on world map m.circle( x="es", y="ns", color="color", size="size", source=_source, line_color="white", alpha=0.6, # set visual properties for selected glyphs selection_fill_color="color", selection_fill_alpha=0.6, selection_line_color="white", selection_line_alpha=0.6, # set visual properties for non-selected glyphs nonselection_fill_color="black", nonselection_fill_alpha=0.01, nonselection_line_color="color", nonselection_line_alpha=0.6, ) return m def create_table(_columns, _source): """Return table linked to ColumnDataSource '_source'.""" table_cols = [TableColumn(field=col, title=col) for col in _columns] return DataTable( source=_source, columns=table_cols, width=1600, height=250, fit_columns=False, ) ############# # callbacks # ############# # noinspection PyUnusedLocal def x_change(attr, old, new): """Replece crossfilter plot.""" l.children[0].children[1] = create_crossfilter(df, source, discrete, x.value, y.value) # noinspection PyUnusedLocal def y_change(attr, old, new): """Replece crossfilter plot.""" l.children[0].children[1] = create_crossfilter(df, source, discrete, x.value, y.value) # noinspection PyUnusedLocal def size_change(attr, old, new): """Update ColumnDataSource 'source'.""" update_source(source, df, size.value, color.value, palette.value, continuous, discrete_sizeable, discrete_colorable) # noinspection PyUnusedLocal def color_change(attr, old, new): """Update ColumnDataSource 'source'.""" update_source(source, df, size.value, color.value, palette.value, continuous, discrete_sizeable, discrete_colorable) # noinspection PyUnusedLocal def selection_change(attr, old, new): """Update ColumnDataSource 'table_source' with selection found in 'source'.""" selected = source.selected['1d']['indices'] table_source.data = table_source.from_df(df.iloc[selected, :]) # noinspection PyUnusedLocal def palette_change(attr, old, new): """Update ColumnDataSource 'source'.""" update_source(source, df, size.value, color.value, palette.value, continuous, discrete_sizeable, discrete_colorable) ######## # Main # ######## # get user config and data paths from session arguments args = doc.session_context.request.arguments # validate config parameter if 'c' in args: configPath = tornado.escape.url_unescape(args.get('c')[0]) # check that file name is valid cleanName = "".join( c for c in configPath if c.isalnum() or (c in ".-_")) # insure filename is safe if cleanName != configPath: # emit error, load error page: invalid character(s) in config parameter message = "Invalid character(s) in config parameter: {}".format( configPath) log.info(message) raise ValueError(message) # check that file exists elif not os.path.isfile("config/" + configPath): # emit error, load error page: no such config file found message = "No such config file found: {}".format(configPath) log.info(message) raise FileNotFoundError(message) # valid name and file exists, therefore pass argument else: configPath = "config/" + configPath else: configPath = "defaultConfig.toml" # load config file with open(configPath) as toml_data: config = pytoml.load(toml_data) # validate data parameter if 'd' in args: dataPath = tornado.escape.url_unescape(args.get('d')[0]) # check that file name is valid cleanName = "".join( c for c in dataPath if c.isalnum() or (c in ".-_")) # insure filename is safe if cleanName != dataPath: # emit error, load error page: invalid character(s) in data parameter message = "Invalid character(s) in data parameter: {}".format( dataPath) log.info(message) raise ValueError(message) # check that file exists elif not os.path.isfile("data/" + dataPath): # emit error, load error page: no such data file found message = "No such data file found: {}".format(dataPath) log.info(message) raise FileNotFoundError(message) # valid name and file exists, therefore pass argument else: dataPath = "data/" + dataPath else: dataPath = config.get("defaultDataPath") if not os.path.isfile(dataPath): message = 'defaultDataPath "{}" from config file "{}" does not point to a file'.format( dataPath, configPath) raise FileNotFoundError(message) df = get_data(dataPath, config.get("force_discrete_colorable", [])) # catigorize columns columns = [c for c in df.columns if c not in {"easting", "northing"}] discrete = [x for x in columns if df[x].dtype == object] continuous = [x for x in columns if x not in discrete] discrete_sizeable = [ x for x in discrete if len(df[x].unique()) <= len(SIZES) ] discrete_colorable = [ x for x in discrete if (len(df[x].unique()) <= config.get("max_discrete_colors", 256)) or ( (x in config.get("force_discrete_colorable", [])) and (len(df[x].unique()) < 256)) ] # create widgets x = Select(title='X-Axis', value=(config.get("default_xAxis") if config.get("default_xAxis") in columns else columns[1]), options=columns) x.on_change('value', x_change) y = Select(title='Y-Axis', value=(config.get("default_yAxis") if config.get("default_yAxis") in columns else columns[2]), options=columns) y.on_change('value', y_change) sizeOptions = ['None'] + discrete_sizeable + continuous size = Select(title='Size', value=config.get("default_sizeBy", "None"), options=sizeOptions) size.on_change('value', size_change) colorOptions = ['None'] + discrete_colorable + continuous color = Select(title='Color', value=config.get("default_colorBy", "None"), options=colorOptions) color.on_change('value', color_change) palleteOptions = [k for k in palettes.keys()] palette = Select(title='Palette', value=config.get("default_palette", "inferno"), options=palleteOptions) palette.on_change('value', palette_change) ###################### # initialize sources # ###################### source = create_source(df, size.value, color.value, palette.value, continuous, discrete_sizeable, discrete_colorable) source.on_change('selected', selection_change) table_source = ColumnDataSource(df) ######################## # javascript callbacks # ######################## download_callback = CustomJS(args=dict(table_source=table_source), code=r""" var data = table_source.data; var columns = %s; var n = columns.length; var m = data[columns[0]].length; var csvLines = []; var currRow = []; for (j=0; j<n; j++) { currRow.push("\"" + columns[j].toString() + "\""); } csvLines.push(currRow.join(",")); for (i=0; i < m; i++) { var currRow = []; for (j=0; j<n; j++) { if (typeof(data[columns[j]][i]) == 'string') { currRow.push("\"" + data[columns[j]][i].toString() + "\""); } else { currRow.push(data[columns[j]][i].toString()); } } csvLines.push(currRow.join(",")); } var filetext = csvLines.join("\n"); var filename = 'data_result.csv'; var blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' }); //addresses IE if (navigator.msSaveBlob) { navigator.msSaveBlob(blob, filename); } else { var link = document.createElement("a"); link = document.createElement('a'); link.href = URL.createObjectURL(blob); link.download = filename; link.target = "_blank"; link.style.visibility = 'hidden'; link.dispatchEvent(new MouseEvent('click')); } """ % json.dumps(columns)) jitter_callback = CustomJS(args=dict(source=source, map_jitter=Jitter()), code=r""" var data = source.data; if (slider.value == 0) { for (var i = 0; i < data['easting'].length; i++) { data['es'][i] = data['easting'][i]; } for (var i = 0; i < data['northing'].length; i++) { data['ns'][i] = data['northing'][i]; } } else { map_jitter.distribution = dist.value map_jitter.width = slider.value * 1000 for (var i = 0; i < data['easting'].length; i++) { data['es'][i] = map_jitter.compute(data['easting'][i]); } for (var i = 0; i < data['northing'].length; i++) { data['ns'][i] = map_jitter.compute(data['northing'][i]); } } source.trigger('change'); """) download_button = Button(label="Download Selected", button_type="success", callback=download_callback) jitter_selector = Select(title="Map Jitter Distribution:", value="uniform", options=["uniform", "normal"], callback=jitter_callback) jitter_slider = Slider(start=0, end=1000, value=0, step=10, title="Map Jitter Width (Km):", callback=jitter_callback) jitter_callback.args["dist"] = jitter_selector jitter_callback.args["slider"] = jitter_slider # initialize plots crossfilter = create_crossfilter(df, source, discrete, x.value, y.value) mapPlot = create_map(source) # create layout controls = widgetbox([ x, y, color, palette, size, jitter_selector, jitter_slider, download_button ], width=200) table = widgetbox(create_table(columns, table_source)) l = layout([[controls, crossfilter, mapPlot], [row(table)]]) # add layout to document doc.add_root(l) doc.title = "Crossfilter" doc.theme = Theme(json=yaml.load(""" attrs: Figure: background_fill_color: '#2F2F2F' border_fill_color: '#2F2F2F' outline_line_color: '#444444' Axis: axis_line_color: "white" axis_label_text_color: "white" major_label_text_color: "white" major_tick_line_color: "white" minor_tick_line_color: "white" minor_tick_line_color: "white" Grid: grid_line_dash: [6, 4] grid_line_alpha: .3 Title: text_color: "white" """))