예제 #1
0
파일: render.py 프로젝트: pplonski/dataprep
def render_correlation_impact(itmdt: Intermediate, plot_width: int,
                              plot_height: int,
                              palette: Sequence[str]) -> Dict[str, Any]:
    """
    Render correlation heatmaps in to tabs
    """
    tabs: List[Panel] = []
    tooltips = [("x", "@x"), ("y", "@y"),
                ("correlation", "@correlation{1.11}")]
    axis_range = itmdt["axis_range"]

    for method, df in itmdt["data"].items():
        # in case of numerical column names
        df = df.copy()
        df["x"] = df["x"].apply(str)
        df["y"] = df["y"].apply(str)

        mapper, color_bar = create_color_mapper(palette)
        x_range = FactorRange(*axis_range)
        y_range = FactorRange(*reversed(axis_range))
        fig = Figure(
            x_range=x_range,
            y_range=y_range,
            plot_width=plot_width,
            plot_height=plot_height,
            x_axis_location="below",
            tools="hover",
            toolbar_location=None,
            tooltips=tooltips,
            background_fill_color="#fafafa",
            title=" ",
        )

        tweak_figure(fig)

        fig.rect(
            x="x",
            y="y",
            width=1,
            height=1,
            source=df,
            fill_color={
                "field": "correlation",
                "transform": mapper
            },
            line_color=None,
        )
        fig.frame_width = plot_width
        fig.add_layout(color_bar, "left")
        tab = Panel(child=fig, title=method)
        tabs.append(tab)

    return {
        "insights": itmdt["insights"],
        "tabledata": itmdt["tabledata"],
        "layout": [panel.child for panel in tabs],
        "meta": [panel.title for panel in tabs],
        "container_width": plot_width + 150,
    }
예제 #2
0
def render_dendrogram(dend: Dict["str", Any], plot_width: int,
                      plot_height: int) -> Figure:
    """
    Render a missing dendrogram.
    """
    # list of lists of dcoords and icoords from scipy.dendrogram
    xs, ys, cols = dend["icoord"], dend["dcoord"], dend["ivl"]

    # if the number of columns is greater than 20, make the plot wider
    if len(cols) > 20:
        plot_width = 28 * len(cols)

    fig = Figure(
        plot_width=plot_width,
        plot_height=plot_height,
        toolbar_location=None,
        tools="",
        title=" ",
    )

    # round the coordinates to integers, and plot the dendrogram
    xs = [[round(coord) for coord in coords] for coords in xs]
    ys = [[round(coord, 2) for coord in coords] for coords in ys]
    fig.multi_line(xs=xs, ys=ys, line_color="#8073ac")

    # extract the horizontal lines for the hover tooltip
    h_lns_x = [coords[1:3] for coords in xs]
    h_lns_y = [coords[1:3] for coords in ys]
    null_mismatch_vals = [coord[0] for coord in h_lns_y]
    source = ColumnDataSource(dict(x=h_lns_x, y=h_lns_y, n=null_mismatch_vals))
    h_lns = fig.multi_line(xs="x", ys="y", source=source, line_color="#8073ac")
    hover_pts = HoverTool(
        renderers=[h_lns],
        tooltips=[("Average distance", "@n{0.1f}")],
        line_policy="interp",
    )
    fig.add_tools(hover_pts)

    # shorten column labels if necessary, and override coordinates with column names
    cols = [f"{col[:16]}..." if len(col) > 18 else col for col in cols]
    axis_coords = list(range(5, 10 * len(cols) + 1, 10))
    axis_overrides = dict(zip(axis_coords, cols))
    fig.xaxis.ticker = axis_coords
    fig.xaxis.major_label_overrides = axis_overrides
    fig.xaxis.major_label_orientation = np.pi / 3
    fig.yaxis.axis_label = "Average Distance Between Clusters"
    fig.grid.visible = False
    fig.frame_width = plot_width
    return fig
예제 #3
0
def render_bar_chart(
    data: Tuple[np.ndarray, np.ndarray, np.ndarray],
    yscale: str,
    plot_width: int,
    plot_height: int,
) -> Figure:
    """
    Render a bar chart for the missing and present values
    """
    pres_cnts, null_cnts, cols = data
    df = pd.DataFrame({"Present": pres_cnts, "Missing": null_cnts}, index=cols)

    if len(df) > 20:
        plot_width = 28 * len(df)

    fig = Figure(
        x_range=list(df.index),
        y_range=[0, df["Present"][0] + df["Missing"][0]],
        plot_width=plot_width,
        plot_height=plot_height,
        y_axis_type=yscale,
        toolbar_location=None,
        tools=[],
        title=" ",
    )

    rend = fig.vbar_stack(
        stackers=df.columns,
        x="index",
        width=0.9,
        color=[CATEGORY20[0], CATEGORY20[2]],
        source=df,
        legend_label=list(df.columns),
    )

    # hover tool with count and percent
    formatter = CustomJSHover(
        args=dict(source=ColumnDataSource(df)),
        code="""
        const columns = Object.keys(source.data)
        const cur_bar = special_vars.data_x - 0.5
        var ttl_bar = 0
        for (let i = 0; i < columns.length; i++) {
            if (columns[i] != 'index'){
                ttl_bar = ttl_bar + source.data[columns[i]][cur_bar]
            }
        }
        const cur_val = source.data[special_vars.name][cur_bar]
        return (cur_val/ttl_bar * 100).toFixed(2)+'%';
    """,
    )
    for i, val in enumerate(df.columns):
        hover = HoverTool(
            tooltips=[
                ("Column", "@index"),
                (f"{val} count", "@$name"),
                (f"{val} percent", "@{%s}{custom}" % rend[i].name),
            ],
            formatters={"@{%s}" % rend[i].name: formatter},
            renderers=[rend[i]],
        )
        fig.add_tools(hover)

    fig.yaxis.axis_label = "Row Count"
    tweak_figure(fig)
    relocate_legend(fig, "left")
    fig.frame_width = plot_width

    return fig
예제 #4
0
def render_heatmaps(df: Optional[pd.DataFrame], plot_width: int, plot_height: int) -> Figure:
    """
    Render missing heatmaps in to tabs
    """
    tooltips = [("x", "@x"), ("y", "@y"), ("correlation", "@correlation{1.11}")]
    mapper, color_bar = create_color_mapper_heatmap(RDBU)

    def empty_figure() -> Figure:
        # If no data to render in the heatmap, i.e. no missing values
        # we render a blank heatmap
        fig = Figure(
            x_range=[],
            y_range=[],
            plot_width=plot_width,
            plot_height=plot_height,
            x_axis_location="below",
            tools="hover",
            toolbar_location=None,
            background_fill_color="#fafafa",
        )

        # Add at least one renderer to fig, otherwise bokeh
        # gives us error -1000 (MISSING_RENDERERS): Plot has no renderers
        fig.rect(x=0, y=0, width=0, height=0)
        return fig

    if df is not None:

        df = df.where(np.triu(np.ones(df.shape)).astype(np.bool)).T  # pylint: disable=no-member

        if df.size != 0:
            x_range = FactorRange(*df.columns)
            y_range = FactorRange(*reversed(df.columns))

            df = df.unstack().reset_index(name="correlation")
            df = df.rename(columns={"level_0": "x", "level_1": "y"})
            df = df[df["x"] != df["y"]]
            df = drop_null(df)

            # in case of numerical column names
            df["x"] = df["x"].apply(str)
            df["y"] = df["y"].apply(str)

            fig = Figure(
                x_range=x_range,
                y_range=y_range,
                plot_width=plot_width,
                plot_height=plot_height,
                x_axis_location="below",
                tools="hover",
                toolbar_location=None,
                tooltips=tooltips,
                background_fill_color="#fafafa",
                title=" ",
            )

            fig.rect(
                x="x",
                y="y",
                width=1,
                height=1,
                source=df,
                fill_color={"field": "correlation", "transform": mapper},
                line_color=None,
            )
        else:
            fig = empty_figure()
    else:
        fig = empty_figure()

    tweak_figure(fig)
    fig.grid.grid_line_color = None
    fig.axis.axis_line_color = None
    fig.add_layout(color_bar, "left")
    fig.frame_width = plot_width
    return fig