def render_correlation_impact(itmdt: Intermediate, plot_width: int, plot_height: int, palette: Sequence[str]) -> Dict[str, Any]: """ Render correlation heatmaps in to tabs """ tabs: List[Panel] = [] tooltips = [("x", "@x"), ("y", "@y"), ("correlation", "@correlation{1.11}")] axis_range = itmdt["axis_range"] for method, df in itmdt["data"].items(): # in case of numerical column names df = df.copy() df["x"] = df["x"].apply(str) df["y"] = df["y"].apply(str) mapper, color_bar = create_color_mapper(palette) x_range = FactorRange(*axis_range) y_range = FactorRange(*reversed(axis_range)) fig = Figure( x_range=x_range, y_range=y_range, plot_width=plot_width, plot_height=plot_height, x_axis_location="below", tools="hover", toolbar_location=None, tooltips=tooltips, background_fill_color="#fafafa", title=" ", ) tweak_figure(fig) fig.rect( x="x", y="y", width=1, height=1, source=df, fill_color={ "field": "correlation", "transform": mapper }, line_color=None, ) fig.frame_width = plot_width fig.add_layout(color_bar, "left") tab = Panel(child=fig, title=method) tabs.append(tab) return { "insights": itmdt["insights"], "tabledata": itmdt["tabledata"], "layout": [panel.child for panel in tabs], "meta": [panel.title for panel in tabs], "container_width": plot_width + 150, }
def render_dendrogram(dend: Dict["str", Any], plot_width: int, plot_height: int) -> Figure: """ Render a missing dendrogram. """ # list of lists of dcoords and icoords from scipy.dendrogram xs, ys, cols = dend["icoord"], dend["dcoord"], dend["ivl"] # if the number of columns is greater than 20, make the plot wider if len(cols) > 20: plot_width = 28 * len(cols) fig = Figure( plot_width=plot_width, plot_height=plot_height, toolbar_location=None, tools="", title=" ", ) # round the coordinates to integers, and plot the dendrogram xs = [[round(coord) for coord in coords] for coords in xs] ys = [[round(coord, 2) for coord in coords] for coords in ys] fig.multi_line(xs=xs, ys=ys, line_color="#8073ac") # extract the horizontal lines for the hover tooltip h_lns_x = [coords[1:3] for coords in xs] h_lns_y = [coords[1:3] for coords in ys] null_mismatch_vals = [coord[0] for coord in h_lns_y] source = ColumnDataSource(dict(x=h_lns_x, y=h_lns_y, n=null_mismatch_vals)) h_lns = fig.multi_line(xs="x", ys="y", source=source, line_color="#8073ac") hover_pts = HoverTool( renderers=[h_lns], tooltips=[("Average distance", "@n{0.1f}")], line_policy="interp", ) fig.add_tools(hover_pts) # shorten column labels if necessary, and override coordinates with column names cols = [f"{col[:16]}..." if len(col) > 18 else col for col in cols] axis_coords = list(range(5, 10 * len(cols) + 1, 10)) axis_overrides = dict(zip(axis_coords, cols)) fig.xaxis.ticker = axis_coords fig.xaxis.major_label_overrides = axis_overrides fig.xaxis.major_label_orientation = np.pi / 3 fig.yaxis.axis_label = "Average Distance Between Clusters" fig.grid.visible = False fig.frame_width = plot_width return fig
def render_bar_chart( data: Tuple[np.ndarray, np.ndarray, np.ndarray], yscale: str, plot_width: int, plot_height: int, ) -> Figure: """ Render a bar chart for the missing and present values """ pres_cnts, null_cnts, cols = data df = pd.DataFrame({"Present": pres_cnts, "Missing": null_cnts}, index=cols) if len(df) > 20: plot_width = 28 * len(df) fig = Figure( x_range=list(df.index), y_range=[0, df["Present"][0] + df["Missing"][0]], plot_width=plot_width, plot_height=plot_height, y_axis_type=yscale, toolbar_location=None, tools=[], title=" ", ) rend = fig.vbar_stack( stackers=df.columns, x="index", width=0.9, color=[CATEGORY20[0], CATEGORY20[2]], source=df, legend_label=list(df.columns), ) # hover tool with count and percent formatter = CustomJSHover( args=dict(source=ColumnDataSource(df)), code=""" const columns = Object.keys(source.data) const cur_bar = special_vars.data_x - 0.5 var ttl_bar = 0 for (let i = 0; i < columns.length; i++) { if (columns[i] != 'index'){ ttl_bar = ttl_bar + source.data[columns[i]][cur_bar] } } const cur_val = source.data[special_vars.name][cur_bar] return (cur_val/ttl_bar * 100).toFixed(2)+'%'; """, ) for i, val in enumerate(df.columns): hover = HoverTool( tooltips=[ ("Column", "@index"), (f"{val} count", "@$name"), (f"{val} percent", "@{%s}{custom}" % rend[i].name), ], formatters={"@{%s}" % rend[i].name: formatter}, renderers=[rend[i]], ) fig.add_tools(hover) fig.yaxis.axis_label = "Row Count" tweak_figure(fig) relocate_legend(fig, "left") fig.frame_width = plot_width return fig
def render_heatmaps(df: Optional[pd.DataFrame], plot_width: int, plot_height: int) -> Figure: """ Render missing heatmaps in to tabs """ tooltips = [("x", "@x"), ("y", "@y"), ("correlation", "@correlation{1.11}")] mapper, color_bar = create_color_mapper_heatmap(RDBU) def empty_figure() -> Figure: # If no data to render in the heatmap, i.e. no missing values # we render a blank heatmap fig = Figure( x_range=[], y_range=[], plot_width=plot_width, plot_height=plot_height, x_axis_location="below", tools="hover", toolbar_location=None, background_fill_color="#fafafa", ) # Add at least one renderer to fig, otherwise bokeh # gives us error -1000 (MISSING_RENDERERS): Plot has no renderers fig.rect(x=0, y=0, width=0, height=0) return fig if df is not None: df = df.where(np.triu(np.ones(df.shape)).astype(np.bool)).T # pylint: disable=no-member if df.size != 0: x_range = FactorRange(*df.columns) y_range = FactorRange(*reversed(df.columns)) df = df.unstack().reset_index(name="correlation") df = df.rename(columns={"level_0": "x", "level_1": "y"}) df = df[df["x"] != df["y"]] df = drop_null(df) # in case of numerical column names df["x"] = df["x"].apply(str) df["y"] = df["y"].apply(str) fig = Figure( x_range=x_range, y_range=y_range, plot_width=plot_width, plot_height=plot_height, x_axis_location="below", tools="hover", toolbar_location=None, tooltips=tooltips, background_fill_color="#fafafa", title=" ", ) fig.rect( x="x", y="y", width=1, height=1, source=df, fill_color={"field": "correlation", "transform": mapper}, line_color=None, ) else: fig = empty_figure() else: fig = empty_figure() tweak_figure(fig) fig.grid.grid_line_color = None fig.axis.axis_line_color = None fig.add_layout(color_bar, "left") fig.frame_width = plot_width return fig