Beispiel #1
0
def get_savefile_path_and_location_heading(
    df: pd.DataFrame,
    *,
    stage: Union[DiseaseStage, Literal[Select.ALL]],
    count: Counting,
    x_axis: Columns.XAxis,
) -> Tuple[Path, str]:
    """Given arguments used to create a plot, return the save path and the location
    heading for that plot

    :param df: The dataframe to be plotted
    :type df: pd.DataFrame
    :param x_axis: The x axis column to be plotted against
    :type x_axis: Columns.XAxis
    :param stage: The disease stage(s) to be plotted
    :type stage: Union[DiseaseStage, Literal[Select.ALL]]
    :param count: The count type to be plotted
    :type count: Counting
    :raises ValueError: Certain know dataframes are explicitly handled; if a dataframe
    containing data that we don't know how to handle is passed, raise a ValueError
    :return: A (Path, str) tuple containing the save path and location heading,
    respectively
    :rtype: Tuple[Path, str]
    """

    DiseaseStage.verify(stage, allow_select=True)
    Counting.verify(count)
    Columns.XAxis.verify(x_axis)

    if Locations.WORLD in df[Columns.COUNTRY].values:
        savefile_basename = "World"
        location_heading = None
    elif df[Columns.COUNTRY].iloc[0] == Locations.CHINA:
        savefile_basename = "China_provinces"
        location_heading = "Province"
    elif df[Columns.IS_STATE].iloc[0]:
        savefile_basename = "States"
        location_heading = "State"
    elif not df[Columns.IS_STATE].any():
        if (df[Columns.COUNTRY] == Locations.CHINA).any():
            savefile_basename = "Countries_w_china"
        else:
            savefile_basename = "Countries_wo_china"

        location_heading = "Country"
    else:
        raise ValueError("DataFrame contents not understood")

    if stage is Select.ALL:
        stage_name = "All"
    else:
        stage_name = stage.pprint()

    savefile_path = (Path() / count.pprint() / x_axis.pprint() /
                     f"Stage_{stage_name}" /
                     Path(savefile_basename.lower()).with_suffix(".png"))
    return savefile_path, location_heading
Beispiel #2
0
def _add_doubling_time_lines(
    fig: plt.Figure,
    ax: plt.Axes,
    *,
    stage: DiseaseStage,
    count: Counting,
    x_axis: Columns.XAxis,
):
    """Add doubling time lines to the given plot

    On a log-scale graph, doubling time lines originate from a point near the
    lower-left and show how fast the number of cases (per capita) would grow if it
    doubled every n days.

    :param fig: The figure containing the plots
    :type fig: plt.Figure
    :param ax: The axes object we wish to annotate
    :type ax: plt.Axes
    :param x_axis: The column to be used for the x-axis of the graph. We only add
    doubling time lines for graphs plotted against days since outbreak (and not actual
    days, as doubling time lines don't make sense then because there is no common
    origin to speak of)
    :type x_axis: Columns.XAxis
    :param stage: The disease stage we are plotting
    :type stage: DiseaseStage
    :param count: The count method used
    :type count: Counting
    """

    DiseaseStage.verify(stage)
    Counting.verify(count)
    Columns.XAxis.verify(x_axis)

    # For ease of computation, everything will be in axes coordinate system
    # Variable names beginning with "ac" refer to axis coords and "dc" to data coords
    # {ac,dc}_{x,y}_{min,max} refer to the coordinates of the doubling-time lines
    if x_axis is Columns.XAxis.DAYS_SINCE_OUTBREAK:
        # Create transformation from data coords to axes coords
        # This composes two transforms, data -> fig and (axes -> fig)^(-1)
        dc_to_ac = ax.transData + ax.transAxes.inverted()

        dc_x_lower_lim, dc_x_upper_lim = ax.get_xlim()
        dc_y_lower_lim, dc_y_upper_lim = ax.get_ylim()

        # Adding stuff causes the axis to resize itself, and we have to stop it
        # from doing so (by setting it back to its original size)
        ax.set_xlim(dc_x_lower_lim, dc_x_upper_lim)

        # Also need to add back margin
        dc_y_upper_lim = dc_to_ac.inverted().transform((0, 1.1))[1]
        ax.set_ylim(dc_y_lower_lim, dc_y_upper_lim)

        # Getting min x,y bounds of lines is easy
        dc_x_min = 0
        dc_y_min = CaseInfo.get_info_item_for(InfoField.THRESHOLD,
                                              stage=stage,
                                              count=count)

        ac_x_min, ac_y_min = dc_to_ac.transform((dc_x_min, dc_y_min))

        # Getting max x,y bounds is trickier due to needing to use the maximum
        # extent of the graph area
        # Get top right corner of graph in data coords (to avoid the edges of the
        # texts' boxes clipping the axes, we move things in just a hair)
        ac_x_upper_lim = ac_y_upper_lim = 1

        doubling_times = [1, 2, 3, 4, 7, 14]  # days (x-axis units)
        for dt in doubling_times:
            # Simple math: assuming dc_y_max := dc_y_upper_lim, then if
            # dc_y_max = dc_y_min * 2**((dc_x_max-dc_x_min)/dt),
            # then...
            dc_x_max = dc_x_min + dt * np.log2(dc_y_upper_lim / dc_y_min)
            ac_x_max, ac_y_max = dc_to_ac.transform((dc_x_max, dc_y_upper_lim))

            # We try to use ac_y_max=1 by default, and if that leads to too long a line
            # (sticking out through the right side of the graph) then we use ac_x_max=1
            # instead and compute ac_y_max accordingly
            if ac_x_max > ac_x_upper_lim:
                dc_y_max = dc_y_min * 2**((dc_x_upper_lim - dc_x_min) / dt)
                ac_x_max, ac_y_max = dc_to_ac.transform(
                    (dc_x_upper_lim, dc_y_max))
                edge = EdgeGuide.RIGHT
            else:
                edge = EdgeGuide.TOP

            # Plot the lines themselves
            ax.plot(
                [ac_x_min, ac_x_max],
                [ac_y_min, ac_y_max],
                transform=ax.transAxes,
                color="0.0",
                alpha=0.7,
                dashes=(1, 2),
                linewidth=1,
            )

            # Annotate lines with assocated doubling times

            # Get text to annotate with
            n_weeks, weekday = divmod(dt, 7)
            if weekday == 0:
                annot_text_str = f"{n_weeks} week"
                if n_weeks != 1:
                    annot_text_str += "s"
            else:
                annot_text_str = f"{dt} day"
                if dt != 1:
                    annot_text_str += "s"

            text_props = {
                "bbox": {
                    "fc": "1.0",
                    "pad": 0,
                    # "edgecolor": "1.0",
                    "alpha": 0.7,
                    "lw": 0,
                }
            }

            # Plot in a temporary location just to get the text box size; we'll move and
            # rotate later
            plotted_text = ax.text(0,
                                   0,
                                   annot_text_str,
                                   text_props,
                                   transform=ax.transAxes)

            ac_line_slope = (ac_y_max - ac_y_min) / (ac_x_max - ac_x_min)
            ac_text_angle_rad = np.arctan(ac_line_slope)
            sin_ac_angle = np.sin(ac_text_angle_rad)
            cos_ac_angle = np.cos(ac_text_angle_rad)

            # Get the unrotated text box bounds
            ac_text_box = plotted_text.get_window_extent(
                fig.canvas.get_renderer()).transformed(ax.transAxes.inverted())
            ac_text_width = ac_text_box.x1 - ac_text_box.x0
            ac_text_height = ac_text_box.y1 - ac_text_box.y0

            # Compute the width and height of the upright rectangle bounding the rotated
            # text box in axis coordinates
            # Simple geometry (a decent high school math problem)
            # We cheat a bit; to create some padding between the rotated text box and
            # the axes, we can add the padding directly to the width and height of the
            # upright rectangle bounding the rotated text box
            # This works because the origin of the rotated text box is in the lower left
            # corner of the upright bounding rectangle, so anything added to these
            # dimensions gets added to the top and right, pushing it away from the axes
            # and producing the padding we want
            # If we wanted to do this the "right" way we'd *redo* the calculations above
            # but with ac_x_upper_lim = ac_y_upper_lim = 1 - padding
            PADDING = 0.005
            ac_rot_text_width = ((ac_text_width * cos_ac_angle) +
                                 (ac_text_height * sin_ac_angle) + PADDING)
            ac_rot_text_height = ((ac_text_width * sin_ac_angle) +
                                  (ac_text_height * cos_ac_angle) + PADDING)

            # Perpendicular distance from text to corresponding line
            AC_DIST_FROM_LINE = 0.005
            # Get text box origin relative to line upper endpoint
            EdgeGuide.verify(edge)
            if edge is EdgeGuide.RIGHT:
                # Account for bit of overhang; when slanted, top left corner of the
                # text box extends left of the bottom left corner, which is its origin
                # Subtracting that bit of overhang (height * sin(theta)) gets us the
                # x-origin
                # This only applies to the x coord; the bottom left corner of the text
                # box is also the bottom of the rotated rectangle
                ac_text_origin_x = ac_x_max - (ac_rot_text_width -
                                               ac_text_height * sin_ac_angle)
                ac_text_origin_y = (
                    ac_y_min + (ac_text_origin_x - ac_x_min) * ac_line_slope +
                    AC_DIST_FROM_LINE / cos_ac_angle)

            # If text box is in very top right of graph, it may use only the right
            # edge of the graph as a guide and hence clip through the top; if that
            # happens, it's effectively the same situation as using the top edge from
            # the start
            if (edge is EdgeGuide.TOP  # Must go first to short-circuit
                    or ac_text_origin_y + ac_rot_text_height > ac_y_upper_lim):
                ac_text_origin_y = ac_y_upper_lim - ac_rot_text_height
                ac_text_origin_x = (
                    ac_x_min - AC_DIST_FROM_LINE / sin_ac_angle +
                    (ac_text_origin_y - ac_y_min) / ac_line_slope)

            # set_x and set_y work in axis coordinates
            plotted_text.set_x(ac_text_origin_x)
            plotted_text.set_y(ac_text_origin_y)
            plotted_text.set_horizontalalignment("left")
            plotted_text.set_verticalalignment("bottom")
            plotted_text.set_rotation(ac_text_angle_rad * 180 /
                                      np.pi)  # takes degrees
            plotted_text.set_rotation_mode("anchor")
Beispiel #3
0
def plot(
    df: pd.DataFrame,
    *,
    start_date=None,
    stage: Union[DiseaseStage, Literal[Select.ALL]] = Select.ALL,
    count: Counting,
    x_axis: Columns.XAxis,
    df_with_china: pd.DataFrame = None,
    style=None,
) -> List[Tuple[plt.Figure, plt.Axes]]:
    """Entry point for plotting

    This function does some basic setup, computing some data necessary for plotting from
    the passed arguments, before passing along all the necessary data to
    _plot_helper, which does the actual plotting work

    :param df: The data to plot
    :type df: pd.DataFrame
    :param x_axis: The x axis column to plot against
    :type x_axis: Columns.XAxis
    :param start_date: A date with which to filter the data; only data after this date
    will be plotted, defaults to None (all data will be plotted)
    :type start_date: [type], optional
    :param stage: The disease stage to plot, defaults to Select.ALL
    :type stage: Union[DiseaseStage, Literal[Select.ALL]], optional
    :param count: The count type to plot
    :type count: Counting
    :param df_with_china: A dataframe to serve as a reference point for creating the
    color mapping; this keeps colors in sync whether or not China is included in the
    plot. Defaults to None, in which case `df` is used to create the color mapping.
    :type df_with_china: pd.DataFrame, optional
    :param style: The matplotlib style to plot with, defaults to None (default style)
    :type style: [type], optional
    :return: A list of plotted objects (Figure, Axes)
    :rtype: List[Tuple[plt.Figure, plt.Axes]]
    """

    DiseaseStage.verify(stage, allow_select=True)
    Counting.verify(count)
    Columns.XAxis.verify(x_axis)

    if x_axis is Columns.XAxis.DATE:
        if start_date is not None:
            df = df[df[Columns.DATE] >= pd.Timestamp(start_date)]

        df = remove_empty_leading_dates(df, count)
    elif x_axis is Columns.XAxis.DAYS_SINCE_OUTBREAK:
        df = df[df[Columns.DAYS_SINCE_OUTBREAK] >= -1]
    else:
        x_axis.raise_for_unhandled_case()

    savefile_path, location_heading = get_savefile_path_and_location_heading(
        df, x_axis=x_axis, stage=stage, count=count)

    if df_with_china is not None:
        color_mapping = get_color_palette_assignments(df_with_china)
    else:
        color_mapping = get_color_palette_assignments(df)

    return _plot_helper(
        df,
        x_axis=x_axis,
        stage=stage,
        count=count,
        style=style,
        color_mapping=color_mapping,
        savefile_path=savefile_path,
        location_heading=location_heading,
    )
Beispiel #4
0
def _plot_helper(
    df: pd.DataFrame,
    *,
    stage: Union[DiseaseStage, Literal[Select.ALL]],
    count: Counting,
    x_axis: Columns.XAxis,
    style: Optional[str] = None,
    color_mapping: LocationColorMapping = None,
    plot_size: Tuple[float] = None,
    savefile_path: Path,
    location_heading: str = None,
) -> List[Tuple[plt.Figure, plt.Axes]]:
    """Do all the logic required to turn the arguments given into the correct plot

    :param df: The dataframe whose data will be plotted
    :type df: pd.DataFrame
    :param x_axis: The x axis column the data will be plotted against
    :type x_axis: Columns.XAxis
    :param stage: The disease stage (one half of the y-value) that will be plotted;
    :type stage: Union[DiseaseStage, Literal[Select.ALL]]
    :param count: The counting method (the other half of the y-value) that will be
    plotted
    :type count: Counting
    :param savefile_path: Where to save the resulting figure
    :type savefile_path: Path
    :param style: The matplotlib plotting style to use, defaults to None
    :type style: Optional[str], optional
    :param color_mapping: The seaborn color mapping to use, defaults to None
    :type color_mapping: LocationColorMapping, optional
    :param plot_size: The (width inches, height inches) plot size, defaults to None
    :type plot_size: Tuple[float], optional
    :param location_heading: What locations are to be called ("Country", "State",
    etc.); the default of None is equivalent to "Location"
    :type location_heading: str, optional
    :return: A list of figures and axes created
    :rtype: List[Tuple[plt.Figure, plt.Axes]]
    """

    DiseaseStage.verify(stage, allow_select=True)
    Counting.verify(count)
    Columns.XAxis.verify(x_axis)

    SORTED_POSITION = "Sorted_Position_"

    figs_and_axes = []

    if plot_size is None:
        plot_size = (10, 12)

    fig, ax = plt.subplots(figsize=(8, 8), dpi=200, facecolor="white")
    fig: plt.Figure
    ax: plt.Axes

    if stage is Select.ALL:
        current_case_counts = get_current_case_data(df,
                                                    stage=Select.DEFAULT,
                                                    count=count,
                                                    x_axis=x_axis)
    else:
        current_case_counts = get_current_case_data(df,
                                                    stage=stage,
                                                    count=count,
                                                    x_axis=x_axis)

    df = df[df[Columns.CASE_TYPE].isin(
        CaseInfo.get_info_items_for(InfoField.CASE_TYPE,
                                    stage=stage,
                                    count=count).values)]

    # Filter and sort color mapping correctly so that colors 1. are assigned to the
    # same locations across graphs (for continuity) and 2. are placed correctly in the
    # legend (for correctness)
    color_mapping = color_mapping.copy()
    color_mapping = color_mapping[color_mapping[Columns.LOCATION_NAME].isin(
        current_case_counts[Columns.LOCATION_NAME])]
    color_mapping[SORTED_POSITION] = color_mapping[Columns.LOCATION_NAME].map(
        current_case_counts[Columns.LOCATION_NAME].tolist().index)
    color_mapping = color_mapping.sort_values(SORTED_POSITION)

    config_df = CaseInfo.get_info_items_for(InfoField.CASE_TYPE,
                                            InfoField.DASH_STYLE,
                                            stage=stage,
                                            count=count)

    with plt.style.context(style or "default"):
        g = sns.lineplot(
            data=df,
            x=x_axis.column(),
            y=Columns.CASE_COUNT,
            hue=Columns.LOCATION_NAME,
            hue_order=color_mapping[Columns.LOCATION_NAME].tolist(),
            style=Columns.CASE_TYPE,
            style_order=config_df[InfoField.CASE_TYPE].tolist(),
            dashes=config_df[InfoField.DASH_STYLE].tolist(),
            palette=color_mapping[COLOR].tolist(),
        )

        default_stage = stage
        if stage is Select.ALL:
            default_stage = DiseaseStage.CONFIRMED

        # Configure axes and ticks
        # X axis (and y axis bottom limit, which is kind of x-axis related)
        if x_axis is Columns.XAxis.DATE:
            ax.xaxis.set_major_formatter(DateFormatter(r"%b %-d"))
            ax.xaxis.set_minor_locator(DayLocator())
            for tick in ax.get_xticklabels():
                tick.set_rotation(80)

        elif x_axis is Columns.XAxis.DAYS_SINCE_OUTBREAK:
            ax.xaxis.set_major_locator(MultipleLocator(5))
            ax.xaxis.set_minor_locator(MultipleLocator(1))

            _threshold, _axis_name = CaseInfo.get_info_items_for(
                InfoField.THRESHOLD,
                InfoField.CASE_TYPE,
                stage=default_stage,
                count=count,
                squeeze_rows=True,
            ).values
            ax.set_xlabel(f"Days Since Reaching {_threshold:.3g} {_axis_name}")

            if stage is not Select.ALL:  # i.e. if only one DiseaseStage plotted
                ax.set_ylim(bottom=_threshold / 4)

        else:
            x_axis.raise_for_unhandled_case()

        # Y axis
        ax.set_ylabel(
            CaseInfo.get_info_item_for(InfoField.CASE_TYPE,
                                       stage=default_stage,
                                       count=count))
        if count is Counting.TOTAL_CASES:
            ax.set_yscale("log", basey=2, nonposy="mask")
            ax.yaxis.set_major_locator(LogLocator(base=2, numticks=1000))
            # ax.yaxis.set_major_formatter(ScalarFormatter())
            ax.yaxis.set_major_formatter(StrMethodFormatter("{x:,.0f}"))
            ax.yaxis.set_minor_locator(
                # 5 ticks is one full "cycle": n, 1.25n, 1.5n, 1.75n, 2n
                # Hence 5-2 minor ticks between each pair of majors (omit endpoints)
                LogLocator(base=2,
                           subs=np.linspace(0.5, 1, 5)[1:-1],
                           numticks=1000))
            ax.yaxis.set_minor_formatter(NullFormatter())
        elif count is Counting.PER_CAPITA:
            ax.set_yscale("log", basey=10, nonposy="mask")
            # No need to set minor ticks; 8 is the default number, which makes one cycle
            # n, 2n, 3n, ..., 8n, 9n, 10n
        else:
            count.raise_for_unhandled_case()

        # Configure plot design
        now_str = datetime.now(
            timezone.utc).strftime(r"%b %-d, %Y at %H:%M UTC")
        ax.set_title(f"Last updated {now_str}", loc="right", fontsize="small")

        for line in g.lines:
            line.set_linewidth(2)
        ax.grid(True, which="minor", axis="both", color="0.9")
        ax.grid(True, which="major", axis="both", color="0.75")

        _format_legend(
            ax=ax,
            x_axis=x_axis,
            count=count,
            location_heading=location_heading,
            current_case_counts=current_case_counts,
        )

        # If using this for a date-like x axis, use this (leaving commented code because
        # I foresee myself needing it eventually)
        # x_max = pd.Timestamp(matplotlib.dates.num2epoch(ax.get_xlim()[1]), unit="s")

        # Add doubling time lines
        _add_doubling_time_lines(fig,
                                 ax,
                                 x_axis=x_axis,
                                 stage=default_stage,
                                 count=count)

        # Save
        savefile_path = Paths.FIGURES / savefile_path
        savefile_path.parent.mkdir(parents=True, exist_ok=True)
        fig.savefig(savefile_path, bbox_inches="tight", dpi=300)
        print(f"Saved '{savefile_path.relative_to(Paths.ROOT)}'")

        figs_and_axes.append((fig, ax))

    return figs_and_axes
Beispiel #5
0
def plot_usa_daybyday_case_diffs(
    states_df: pd.DataFrame,
    *,
    geo_df: geopandas.GeoDataFrame = None,
    stage: Union[DiseaseStage, Literal[Select.ALL]],
    count: Union[Counting, Literal[Select.ALL]],
    dates: List[pd.Timestamp] = None,
) -> pd.DataFrame:

    Counting.verify(count, allow_select=True)
    DiseaseStage.verify(stage, allow_select=True)

    if geo_df is None:
        geo_df = get_geo_df()

    DIFF_COL = "Diff_"
    ASPECT_RATIO = 1 / 20
    PAD_FRAC = 0.5
    N_CBAR_BUCKETS = 6  # only used when bucketing colormap into discrete regions
    N_BUCKETS_BTWN_MAJOR_TICKS = 1
    N_MINOR_TICKS_BTWN_MAJOR_TICKS = 8  # major_1, minor_1, ..., minor_n, major_2
    N_CBAR_MAJOR_TICKS = N_CBAR_BUCKETS // N_BUCKETS_BTWN_MAJOR_TICKS + 1
    CMAP = cmocean.cm.matter
    # CMAP = ListedColormap(cmocean.cm.matter(np.linspace(0, 1, N_CBAR_BUCKETS)))
    DPI = 300
    NOW_STR = datetime.now(timezone.utc).strftime(r"%b %-d, %Y at %H:%M UTC")

    ID_COLS = [
        Columns.TWO_LETTER_STATE_CODE,
        Columns.DATE,
        Columns.STAGE,
        Columns.COUNT_TYPE,
    ]

    save_fig_kwargs = {
        "dpi": "figure",
        "bbox_inches": "tight",
        "facecolor": "w"
    }

    if count is Select.ALL:
        count_list = list(Counting)
    else:
        count_list = [count]

    if stage is Select.ALL:
        stage_list = list(DiseaseStage)
    else:
        stage_list = [stage]

    count_list: List[Counting]
    stage_list: List[DiseaseStage]

    if dates is None:
        dates: List[pd.Timestamp] = states_df[Columns.DATE].unique()

    dates = sorted(pd.Timestamp(date) for date in dates)

    # Get day-by-day case diffs per location, date, stage, count-type
    case_diffs_df = states_df[
        (states_df[Columns.TWO_LETTER_STATE_CODE].isin(USA_STATE_CODES))
        &
        (~states_df[Columns.TWO_LETTER_STATE_CODE].isin(["AK", "HI"]))].copy()

    # Make sure data exists for every date for every state so that the entire country is
    # plotted each day; fill missing data with 0 (missing really *is* as good as 0)
    state_date_stage_combos = pd.MultiIndex.from_product(
        [
            case_diffs_df[Columns.TWO_LETTER_STATE_CODE].unique(),
            dates,
            [s.name for s in DiseaseStage],
            [c.name for c in Counting],
        ],
        names=ID_COLS,
    )

    case_diffs_df = (state_date_stage_combos.to_frame(index=False).merge(
        case_diffs_df,
        how="left",
        on=ID_COLS,
    ).sort_values(ID_COLS))

    case_diffs_df[Columns.CASE_COUNT] = case_diffs_df[
        Columns.CASE_COUNT].fillna(0)

    case_diffs_df[DIFF_COL] = case_diffs_df.groupby(
        [Columns.TWO_LETTER_STATE_CODE, Columns.STAGE,
         Columns.COUNT_TYPE])[Columns.CASE_COUNT].diff()

    case_diffs_df = case_diffs_df[case_diffs_df[DIFF_COL].notna()]

    dates = case_diffs_df[Columns.DATE].unique()

    vmins = {
        Counting.TOTAL_CASES:
        1,
        Counting.PER_CAPITA:
        case_diffs_df.loc[case_diffs_df[DIFF_COL] > 0, DIFF_COL].min(),
    }
    vmaxs = case_diffs_df.groupby([Columns.STAGE,
                                   Columns.COUNT_TYPE])[DIFF_COL].max()

    fig: plt.Figure = plt.figure(facecolor="white", dpi=DPI)

    # Don't put too much stock in these, we tweak them later to make sure they're even
    fig_width_px = len(count_list) * 1800
    fig_height_px = len(stage_list) * 1000 + 200

    max_date = max(dates)

    # The order doesn't matter, but doing later dates first lets us see interesting
    # output in Finder earlier, which is good for debugging
    for date in reversed(dates):
        date: pd.Timestamp = pd.Timestamp(date)
        # Data is associated with the right endpoint of the data collection period,
        # e.g., data collected *on* March 20 is labeled March 21 -- this is done so that
        # data collected today (on the day the code is run) has a meaningful date
        # associated with it (today's current time)
        # Anyway, here we undo that and display data on the date it was collected
        # in order to show a meaningful title on the graph
        if date == date.normalize():
            collection_date = date - pd.Timedelta(days=1)
        else:
            collection_date = date.normalize()

        fig.suptitle(collection_date.strftime(r"%b %-d, %Y"))

        for subplot_index, (stage, count) in enumerate(itertools.product(
                stage_list, count_list),
                                                       start=1):
            ax: plt.Axes = fig.add_subplot(len(stage_list), len(count_list),
                                           subplot_index)

            # Add timestamp to top right axis
            if subplot_index == 2:
                ax.text(
                    1.25,  # Coords are arbitrary magic numbers
                    1.23,
                    f"Last updated {NOW_STR}",
                    horizontalalignment="right",
                    fontsize="small",
                    transform=ax.transAxes,
                )

            # Filter to just this axes: this stage, this count-type, this date
            stage_date_df = case_diffs_df[
                (case_diffs_df[Columns.STAGE] == stage.name)
                & (case_diffs_df[Columns.COUNT_TYPE] == count.name)
                & (case_diffs_df[Columns.DATE] == date)]

            # Should have length 49 (50 + DC - AK - HI)
            stage_geo_df: geopandas.GeoDataFrame = geo_df.merge(
                stage_date_df,
                how="inner",
                left_on="STUSPS",
                right_on=Columns.TWO_LETTER_STATE_CODE,
            )
            assert len(stage_geo_df) == 49

            vmin = vmins[count]
            vmax = vmaxs.loc[(stage.name, count.name)]

            # Create log-scaled color mapping
            # https://stackoverflow.com/a/43807666
            norm = LogNorm(vmin, vmax)
            scm = plt.cm.ScalarMappable(norm=norm, cmap=CMAP)

            # Actually plot the data. Omit legend, since we'll want to customize it and
            # it's easier to create a new one than customize the existing one.
            stage_geo_df.plot(
                column=DIFF_COL,
                ax=ax,
                legend=False,
                vmin=vmin,
                vmax=vmax,
                cmap=CMAP,
                norm=norm,
            )

            # Plot state boundaries
            stage_geo_df.boundary.plot(ax=ax, linewidth=0.06, edgecolor="k")

            # Add colorbar axes to right side of graph
            # https://stackoverflow.com/a/33505522
            divider = make_axes_locatable(ax)
            width = axes_size.AxesY(ax, aspect=ASPECT_RATIO)
            pad = axes_size.Fraction(PAD_FRAC, width)
            cax = divider.append_axes("right", size=width, pad=pad)

            # Add colorbar itself
            cbar = fig.colorbar(scm, cax=cax)

            # Add evenly spaced ticks and their labels
            # First major, then minor
            # Adapted from https://stackoverflow.com/a/50314773
            bucket_size = (vmax / vmin)**(1 / N_CBAR_BUCKETS)
            tick_dist = bucket_size**N_BUCKETS_BTWN_MAJOR_TICKS

            # Simple log scale math
            major_tick_locs = (
                vmin * (tick_dist**np.arange(0, N_CBAR_MAJOR_TICKS))
                # * (bucket_size ** 0.5) # Use this if centering ticks on buckets
            )

            cbar.set_ticks(major_tick_locs)

            # Get minor locs by linearly interpolating between major ticks
            minor_tick_locs = []
            for major_tick_index, this_major_tick in enumerate(
                    major_tick_locs[:-1]):
                next_major_tick = major_tick_locs[major_tick_index + 1]

                # Get minor ticks as numbers in range [this_major_tick, next_major_tick]
                # and exclude the major ticks themselves (once we've used them to
                # compute the minor tick locs)
                minor_tick_locs.extend(
                    np.linspace(
                        this_major_tick,
                        next_major_tick,
                        N_MINOR_TICKS_BTWN_MAJOR_TICKS + 2,
                    )[1:-1])

            cbar.ax.yaxis.set_ticks(minor_tick_locs, minor=True)
            cbar.ax.yaxis.set_minor_formatter(NullFormatter())

            # Add major tick labels
            if count is Counting.PER_CAPITA:
                fmt_func = "{:.2e}".format
            else:
                fmt_func = functools.partial(format_float,
                                             max_digits=5,
                                             decimal_penalty=2)

            cbar.set_ticklabels(
                [fmt_func(x) if x != 0 else "0" for x in major_tick_locs])

            # Set axes titles
            ax_stage_name: str = {
                DiseaseStage.CONFIRMED: "Cases",
                DiseaseStage.DEATH: "Deaths",
            }[stage]
            ax_title_components: List[str] = ["New Daily", ax_stage_name]
            if count is Counting.PER_CAPITA:
                ax_title_components.append("Per Capita")

            ax.set_title(" ".join(ax_title_components))

            # Remove axis ticks (I think they're lat/long but we don't need them)
            for spine in [ax.xaxis, ax.yaxis]:
                spine.set_major_locator(NullLocator())
                spine.set_minor_locator(NullLocator())

        # Save figure, and then deal with matplotlib weirdness that doesn't exactly
        # respect the dimensions we set due to bbox_inches='tight'
        save_path: Path = DOD_DIFF_DIR / f"dod_diff_{date.strftime(r'%Y%m%d')}.png"
        fig.set_size_inches(fig_width_px / DPI, fig_height_px / DPI)
        fig.savefig(save_path, **save_fig_kwargs)

        # x264 video encoder requires frames have even width and height
        resize_to_even_dims(save_path)

        # Save poster (preview frame for video on web)
        if date == max_date:
            (GEO_FIG_DIR / "dod_diff_poster.png").write_bytes(
                save_path.read_bytes())

        fig.clf()

        print(f"Saved '{save_path}'")

        # if date < pd.Timestamp("2020-4-16"):
        #     break

    return case_diffs_df
def __make_daybyday_interactive_timeline(
    df: pd.DataFrame,
    *,
    geo_df: geopandas.GeoDataFrame,
    value_col: str,
    transform_df_func: Callable[[pd.DataFrame], pd.DataFrame] = None,
    stage: Union[DiseaseStage, Literal[Select.ALL]] = Select.ALL,
    count: Union[Counting, Literal[Select.ALL]] = Select.ALL,
    out_file_basename: str,
    subplot_title_prefix: str,
    plot_aspect_ratio: float = None,
    cmap=None,
    n_cbar_buckets: int = None,
    n_buckets_btwn_major_ticks: int = None,
    n_minor_ticks_btwn_major_ticks: int = None,
    per_capita_denominator: int = None,
    x_range: Tuple[float, float],
    y_range: Tuple[float, float],
    min_visible_y_range: float,
    should_make_video: bool,
) -> InfoForAutoload:
    """Create the bokeh interactive timeline plot(s)

    This function takes the given DataFrame, which must contain COVID data for locations
    on different dates, and a GeoDataFrame, which contains the long/lat coords for those
    locations, and creates an interactive choropleth of the COVID data over time.

    :param df: The COVID data DataFrame
    :type df: pd.DataFrame
    :param geo_df: The geometry GeoDataFrame for the locations in `df`
    :type geo_df: geopandas.GeoDataFrame
    :param value_col: The column of `df` containing the values to plot in the
    choropleth; should be something like "Case_Counts" or "Case_Diff_From_Prev_Day"
    :type value_col: str
    :param stage: The DiseaseStage to plot, defaults to Select.ALL. If ALL, then all
    stages are plotted and are stacked vertically.
    :type stage: Union[DiseaseStage, Literal[Select.ALL]], optional
    :param count: The Counting to plot, defaults to Select.ALL. If ALL, then all
    count types are plotted and are stacked horizontally.
    :type count: Union[Counting, Literal[Select.ALL]], optional
    :param out_file_basename: The basename of the file to save the interactive plots to
    (there are two components, the JS script and the HTML <div>)
    :type out_file_basename: str
    :param subplot_title_prefix: What the first part of the subplot title should be;
    probably a function of `value_col` (if value_col is "Case_Counts" then this param
    might be "Cases" or "# of Cases")
    :type subplot_title_prefix: str
    :param x_range: The range of the x-axis as (min, max)
    :type x_range: Tuple[float, float]
    :param y_range: The range of the y-axis as (min, max)
    :type y_range: Tuple[float, float]
    :param min_visible_y_range: The minimum height (in axis units) of the y-axis; it
    will not be possible to zoom in farther than this on the choropleth.
    :type min_visible_y_range: float
    :param should_make_video: Optionally run through the timeline day by day, capture
    a screenshot for each day, and then stitch the screenshots into a video. The video
    shows the same info as the interactive plots, but not interactively. This easily
    takes 20x as long as just making the graphs themselves, so use with caution.
    :type should_make_video: bool
    :param transform_df_func: This function expects data in a certain format, and does
    a bunch of preprocessing (expected to be common) before plotting. This gives you a
    chance to do any customization on the postprocessed df before it's plotted. Defaults
    to None, in which case no additional transformation is performed.
    :type transform_df_func: Callable[[pd.DataFrame], pd.DataFrame], optional
    :param plot_aspect_ratio: The aspect ratio of the plot as width/height; if set, the
    aspect ratio will be fixed to this. Defaults to None, in which case the aspect ratio
    is determined from the x_range and y_range arguments
    :type plot_aspect_ratio: float, optional
    :param cmap: The colormap to use as either a matplotlib-compatible colormap or a
    list of hex strings (e.g., ["#ae8f1c", ...]). Defaults to None in which case a
    reasonable default is used.
    :type cmap: Matplotlib-compatible colormap or List[str], optional
    :param n_cbar_buckets: How many colorbar buckets to use. Has little effect if the
    colormap is continuous, but always works in conjunction with
    n_buckets_btwn_major_ticks to determine the number of major ticks. Defaults to 6.
    :type n_cbar_buckets: int, optional
    :param n_buckets_btwn_major_ticks: How many buckets are to lie between colorbar
    major ticks, determining how many major ticks are drawn. Defaults to 1.
    :type n_buckets_btwn_major_ticks: int, optional
    :param n_minor_ticks_btwn_major_ticks: How many minor ticks to draw between colorbar
    major ticks. Defaults to 8 (which means each pair of major ticks has 10 ticks
    total).
    :type n_minor_ticks_btwn_major_ticks: int, optional
    :param per_capita_denominator: When describing per-capita numbers, what to use as
    the denominator (e.g., cases per 100,000 people). If None, it is automatically
    computed per plot to be appropriately scaled for the data.
    :type per_capita_denominator: int, optional
    :raises ValueError: [description]
    :return: The two pieces of info required to make a Bokeh autoloading HTML+JS plot:
    the HTML div to be inserted somewhere in the HTML body, and the JS file that will
    load the plot into that div.
    :rtype: InfoForAutoload
    """

    Counting.verify(count, allow_select=True)
    DiseaseStage.verify(stage, allow_select=True)

    # The date as a string, so that bokeh can use it as a column name
    STRING_DATE_COL = "String_Date_"
    # A column whose sole purpose is to be a (the same) date associated with each
    # location
    FAKE_DATE_COL = "Fake_Date_"
    # The column we'll actually use for the colors; it's computed from value_col
    COLOR_COL = "Color_"

    # Under no circumstances may you change this date format
    # It's not just a pretty date representation; it actually has to match up with the
    # date strings computed in JS
    DATE_FMT = r"%Y-%m-%d"

    ID_COLS = [
        REGION_NAME_COL,
        Columns.DATE,
        Columns.STAGE,
        Columns.COUNT_TYPE,
    ]

    if cmap is None:
        cmap = cmocean.cm.matter

    if n_cbar_buckets is None:
        n_cbar_buckets = 6

    if n_buckets_btwn_major_ticks is None:
        n_buckets_btwn_major_ticks = 1

    if n_minor_ticks_btwn_major_ticks is None:
        n_minor_ticks_btwn_major_ticks = 8

    n_cbar_major_ticks = n_cbar_buckets // n_buckets_btwn_major_ticks + 1

    try:
        color_list = [
            # Convert matplotlib colormap to bokeh (list of hex strings)
            # https://stackoverflow.com/a/49934218
            RGB(*rgb).to_hex()
            for i, rgb in enumerate((255 * cmap(range(256))).astype("int"))
        ]
    except TypeError:
        color_list = cmap

    color_list: List[BokehColor]

    if stage is Select.ALL:
        stage_list = list(DiseaseStage)
    else:
        stage_list = [stage]

    if count is Select.ALL:
        count_list = list(Counting)
    else:
        count_list = [count]

    stage_list: List[DiseaseStage]
    count_list: List[Counting]

    stage_count_list: List[Tuple[DiseaseStage, Counting]] = list(
        itertools.product(stage_list, count_list))

    df = df.copy()

    # Unadjust dates (see SaveFormats._adjust_dates)
    normalized_dates = df[Columns.DATE].dt.normalize()
    is_at_midnight = df[Columns.DATE] == normalized_dates
    df.loc[is_at_midnight, Columns.DATE] -= pd.Timedelta(days=1)
    df.loc[~is_at_midnight, Columns.DATE] = normalized_dates[~is_at_midnight]

    min_date, max_date = df[Columns.DATE].agg(["min", "max"])
    dates: List[pd.Timestamp] = pd.date_range(start=min_date,
                                              end=max_date,
                                              freq="D")
    max_date_str = max_date.strftime(DATE_FMT)

    # Get day-by-day case diffs per location, date, stage, count-type

    # Make sure data exists for every date for every state so that the entire country is
    # plotted each day; fill missing data with 0 (missing really *is* as good as 0)
    # enums will be replaced by their name (kind of important)
    id_cols_product: pd.MultiIndex = pd.MultiIndex.from_product(
        [
            df[REGION_NAME_COL].unique(),
            dates,
            [s.name for s in DiseaseStage],
            [c.name for c in Counting],
        ],
        names=ID_COLS,
    )

    df = (id_cols_product.to_frame(index=False).merge(
        df,
        how="left",
        on=ID_COLS,
    ).sort_values(ID_COLS))

    df[STRING_DATE_COL] = df[Columns.DATE].dt.strftime(DATE_FMT)
    df[Columns.CASE_COUNT] = df[Columns.CASE_COUNT].fillna(0)

    if transform_df_func is not None:
        df = transform_df_func(df)

    df = geo_df.merge(df, how="inner", on=REGION_NAME_COL)[[
        REGION_NAME_COL,
        Columns.DATE,
        STRING_DATE_COL,
        Columns.STAGE,
        Columns.COUNT_TYPE,
        value_col,
    ]]

    dates: List[pd.Timestamp] = [
        pd.Timestamp(d) for d in df[Columns.DATE].unique()
    ]

    values_mins_maxs = (df[df[value_col] > 0].groupby(
        [Columns.STAGE, Columns.COUNT_TYPE])[value_col].agg(["min", "max"]))

    vmins: pd.Series = values_mins_maxs["min"]
    vmaxs: pd.Series = values_mins_maxs["max"]

    pow10s_series: pd.Series = vmaxs.map(
        lambda x: int(10**(-np.floor(np.log10(x)))))

    # _pow_10s_series_dict = {}
    # for stage in DiseaseStage:
    #     _pow_10s_series_dict.update(
    #         {
    #             (stage.name, Counting.TOTAL_CASES.name): 100000,
    #             (stage.name, Counting.PER_CAPITA.name): 10000,
    #         }
    #     )

    # pow10s_series = pd.Series(_pow_10s_series_dict)

    vmins: dict = vmins.to_dict()
    vmaxs: dict = vmaxs.to_dict()

    for stage in DiseaseStage:
        _value_key = (stage.name, Counting.PER_CAPITA.name)
        if per_capita_denominator is None:
            _max_pow10 = pow10s_series.loc[(slice(None),
                                            Counting.PER_CAPITA.name)].max()
        else:
            _max_pow10 = per_capita_denominator

        vmins[_value_key] *= _max_pow10
        vmaxs[_value_key] *= _max_pow10
        pow10s_series[_value_key] = _max_pow10

    percap_pow10s: pd.Series = df.apply(
        lambda row: pow10s_series[
            (row[Columns.STAGE], row[Columns.COUNT_TYPE])],
        axis=1,
    )

    _per_cap_rows = df[Columns.COUNT_TYPE] == Counting.PER_CAPITA.name
    df.loc[_per_cap_rows, value_col] *= percap_pow10s.loc[_per_cap_rows]

    # Ideally we wouldn't have to pivot, and we could do a JIT join of state longs/lats
    # after filtering the data. Unfortunately this is not possible, and a long data
    # format leads to duplication of the very large long/lat lists; pivoting is how we
    # avoid that. (This seems to be one downside of bokeh when compared to plotly)
    df = (df.pivot_table(
        index=[REGION_NAME_COL, Columns.STAGE, Columns.COUNT_TYPE],
        columns=STRING_DATE_COL,
        values=value_col,
        aggfunc="first",
    ).reset_index().merge(
        geo_df[[REGION_NAME_COL, LONG_COL, LAT_COL]],
        how="inner",
        on=REGION_NAME_COL,
    ))

    # All three oclumns are just initial values; they'll change with the date slider
    df[value_col] = df[max_date_str]
    df[FAKE_DATE_COL] = max_date_str
    df[COLOR_COL] = np.where(df[value_col] > 0, df[value_col], "NaN")

    # Technically takes a df but we don't need the index
    bokeh_data_source = ColumnDataSource(
        {k: v.tolist()
         for k, v in df.to_dict(orient="series").items()})

    filters = [[
        GroupFilter(column_name=Columns.STAGE, group=stage.name),
        GroupFilter(column_name=Columns.COUNT_TYPE, group=count.name),
    ] for stage, count in stage_count_list]

    figures = []

    for subplot_index, (stage, count) in enumerate(stage_count_list):
        # fig = bplotting.figure()
        # ax: plt.Axes = fig.add_subplot(
        #     len(stage_list), len(count_list), subplot_index
        # )

        # # Add timestamp to top right axis
        # if subplot_index == 2:
        #     ax.text(
        #         1.25,  # Coords are arbitrary magic numbers
        #         1.23,
        #         f"Last updated {NOW_STR}",
        #         horizontalalignment="right",
        #         fontsize="small",
        #         transform=ax.transAxes,
        #     )

        view = CDSView(source=bokeh_data_source,
                       filters=filters[subplot_index])

        vmin = vmins[(stage.name, count.name)]
        vmax = vmaxs[(stage.name, count.name)]

        # Compute and set axes titles
        if stage is DiseaseStage.CONFIRMED:
            fig_stage_name = "Cases"
        elif stage is DiseaseStage.DEATH:
            fig_stage_name = "Deaths"
        else:
            raise ValueError

        fig_title_components: List[str] = []
        if subplot_title_prefix is not None:
            fig_title_components.append(subplot_title_prefix)

        fig_title_components.append(fig_stage_name)

        if count is Counting.PER_CAPITA:
            _per_cap_denom = pow10s_series[(stage.name, count.name)]
            fig_title_components.append(f"Per {_per_cap_denom:,d} people")
            formatter = PrintfTickFormatter(format=r"%2.3f")
            label_standoff = 12
            tooltip_fmt = "{0.000}"
        else:
            formatter = NumeralTickFormatter(format="0.0a")
            label_standoff = 10
            tooltip_fmt = "{0}"

        color_mapper = LogColorMapper(
            color_list,
            low=vmin,
            high=vmax,
            nan_color="#f2f2f2",
        )

        fig_title = " ".join(fig_title_components)

        if plot_aspect_ratio is None:
            if x_range is None or y_range is None:
                raise ValueError("Must provide both `x_range` and `y_range`" +
                                 " when `plot_aspect_ratio` is None")
            plot_aspect_ratio = (x_range[1] - x_range[0]) / (y_range[1] -
                                                             y_range[0])

        # Create figure object
        p = bplotting.figure(
            title=fig_title,
            title_location="above",
            tools=[
                HoverTool(
                    tooltips=[
                        ("Date", f"@{{{FAKE_DATE_COL}}}"),
                        ("State", f"@{{{REGION_NAME_COL}}}"),
                        ("Count", f"@{{{value_col}}}{tooltip_fmt}"),
                    ],
                    toggleable=False,
                ),
                PanTool(),
                BoxZoomTool(match_aspect=True),
                ZoomInTool(),
                ZoomOutTool(),
                ResetTool(),
            ],
            active_drag=None,
            aspect_ratio=plot_aspect_ratio,
            output_backend="webgl",
            lod_factor=4,
            lod_interval=400,
            lod_threshold=1000,
            lod_timeout=300,
        )

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        # Finally, add the actual choropleth data we care about
        p.patches(
            LONG_COL,
            LAT_COL,
            source=bokeh_data_source,
            view=view,
            fill_color={
                "field": COLOR_COL,
                "transform": color_mapper
            },
            line_color="black",
            line_width=0.25,
            fill_alpha=1,
        )

        # Add evenly spaced ticks and their labels to the colorbar
        # First major, then minor
        # Adapted from https://stackoverflow.com/a/50314773
        bucket_size = (vmax / vmin)**(1 / n_cbar_buckets)
        tick_dist = bucket_size**n_buckets_btwn_major_ticks

        # Simple log scale math
        major_tick_locs = (
            vmin * (tick_dist**np.arange(0, n_cbar_major_ticks))
            # * (bucket_size ** 0.5) # Use this if centering ticks on buckets
        )
        # Get minor locs by linearly interpolating between major ticks
        minor_tick_locs = []
        for major_tick_index, this_major_tick in enumerate(
                major_tick_locs[:-1]):
            next_major_tick = major_tick_locs[major_tick_index + 1]

            # Get minor ticks as numbers in range [this_major_tick, next_major_tick]
            # and exclude the major ticks themselves (once we've used them to
            # compute the minor tick locs)
            minor_tick_locs.extend(
                np.linspace(
                    this_major_tick,
                    next_major_tick,
                    n_minor_ticks_btwn_major_ticks + 2,
                )[1:-1])

        color_bar = ColorBar(
            color_mapper=color_mapper,
            ticker=FixedTicker(ticks=major_tick_locs,
                               minor_ticks=minor_tick_locs),
            formatter=formatter,
            label_standoff=label_standoff,
            major_tick_out=0,
            major_tick_in=13,
            major_tick_line_color="white",
            major_tick_line_width=1,
            minor_tick_out=0,
            minor_tick_in=5,
            minor_tick_line_color="white",
            minor_tick_line_width=1,
            location=(0, 0),
            border_line_color=None,
            bar_line_color=None,
            orientation="vertical",
        )

        p.add_layout(color_bar, "right")
        p.hover.point_policy = "follow_mouse"

        # Bokeh axes (and most other things) are splattable
        p.axis.visible = False

        figures.append(p)

    # Make all figs pan and zoom together by setting their axes equal to each other
    # Also fix the plots' aspect ratios
    figs_iter = iter(np.ravel(figures))
    anchor_fig = next(figs_iter)

    if x_range is not None and y_range is not None:
        data_aspect_ratio = (x_range[1] - x_range[0]) / (y_range[1] -
                                                         y_range[0])
    else:
        data_aspect_ratio = plot_aspect_ratio

    if x_range is not None:
        anchor_fig.x_range = Range1d(
            *x_range,
            bounds="auto",
            min_interval=min_visible_y_range * data_aspect_ratio,
        )

    if y_range is not None:
        anchor_fig.y_range = Range1d(*y_range,
                                     bounds="auto",
                                     min_interval=min_visible_y_range)

    for fig in figs_iter:
        fig.x_range = anchor_fig.x_range
        fig.y_range = anchor_fig.y_range

    # 2x2 grid (for now)
    gp = gridplot(
        figures,
        ncols=len(count_list),
        sizing_mode="scale_both",
        toolbar_location="above",
    )
    plot_layout = [gp]

    # Ok, pause
    # Now we're going into a whole other thing: we're doing all the JS logic behind a
    # date slider that changes which date is shown on the graphs. The structure of the
    # data is one column per date, one row per location, and a few extra columns to
    # store the data the graph will use. When we adjust the date of the slider, we copy
    # the relevant column of the df into the columns the graphs are looking at.
    # That's the easy part; the hard part is handling the "play button" functionality,
    # whereby the user can click one button and the date slider will periodically
    # advance itself. That requires a fair bit of logic to schedule and cancel the
    # timers and make it all feel right.

    # Create unique ID for the JS playback info object for this plot (since it'll be on
    # the webpage with other plots, and their playback info isn't shared)
    _THIS_PLOT_ID = uuid.uuid4().hex

    __TIMER = "'timer'"
    __IS_ACTIVE = "'isActive'"
    __SELECTED_INDEX = "'selectedIndex'"
    __BASE_INTERVAL_MS = "'BASE_INTERVAL'"  # Time (in MS) btwn frames when speed==1
    __TIMER_START_DATE = "'startDate'"
    __TIMER_ELAPSED_TIME_MS = "'elapsedTimeMS'"
    __TIMER_ELAPSED_TIME_PROPORTION = "'elapsedTimeProportion'"
    __SPEEDS_KEY = "'SPEEDS'"
    __PLAYBACK_INFO = f"window._playbackInfo_{_THIS_PLOT_ID}"

    _PBI_TIMER = f"{__PLAYBACK_INFO}[{__TIMER}]"
    _PBI_IS_ACTIVE = f"{__PLAYBACK_INFO}[{__IS_ACTIVE}]"
    _PBI_SELECTED_INDEX = f"{__PLAYBACK_INFO}[{__SELECTED_INDEX}]"
    _PBI_TIMER_START_DATE = f"{__PLAYBACK_INFO}[{__TIMER_START_DATE}]"
    _PBI_TIMER_ELAPSED_TIME_MS = f"{__PLAYBACK_INFO}[{__TIMER_ELAPSED_TIME_MS}]"
    _PBI_TIMER_ELAPSED_TIME_PROPORTION = (
        f"{__PLAYBACK_INFO}[{__TIMER_ELAPSED_TIME_PROPORTION}]")
    _PBI_BASE_INTERVAL = f"{__PLAYBACK_INFO}[{__BASE_INTERVAL_MS}]"
    _PBI_SPEEDS = f"{__PLAYBACK_INFO}[{__SPEEDS_KEY}]"
    _PBI_CURR_INTERVAL_MS = (
        f"{_PBI_BASE_INTERVAL} / {_PBI_SPEEDS}[{_PBI_SELECTED_INDEX}]")

    _SPEED_OPTIONS = [0.25, 0.5, 1.0, 2.0]
    _DEFAULT_SPEED = 1.0
    _DEFAULT_SELECTED_INDEX = _SPEED_OPTIONS.index(_DEFAULT_SPEED)

    _SETUP_WINDOW_PLAYBACK_INFO = f"""
        if (typeof({__PLAYBACK_INFO}) === 'undefined') {{
            {__PLAYBACK_INFO} = {{
                {__TIMER}: null,
                {__IS_ACTIVE}: false,
                {__SELECTED_INDEX}: {_DEFAULT_SELECTED_INDEX},
                {__TIMER_START_DATE}: null,
                {__TIMER_ELAPSED_TIME_MS}: 0,
                {__TIMER_ELAPSED_TIME_PROPORTION}: 0,
                {__BASE_INTERVAL_MS}: 1000,
                {__SPEEDS_KEY}: {_SPEED_OPTIONS}
            }};
        }}

    """

    _DEFFUN_INCR_DATE = f"""
        // See this link for why this works (it's an undocumented feature?)
        // https://discourse.bokeh.org/t/5254
        // Tl;dr we need this to automatically update the hover as the play button plays
        // Without this, the hover tooltip only updates when we jiggle the mouse
        // slightly

        let prev_val = null;
        source.inspect.connect(v => prev_val = v);

        function updateDate() {{
            {_PBI_TIMER_START_DATE} = new Date();
            {_PBI_TIMER_ELAPSED_TIME_MS} = 0
            if (dateSlider.value < maxDate) {{
                dateSlider.value += 86400000;
            }}

            if (dateSlider.value >= maxDate) {{
                console.log(dateSlider.value, maxDate)
                console.log('reached end')
                clearInterval({_PBI_TIMER});
                {_PBI_IS_ACTIVE} = false;
                playPauseButton.active = false;
                playPauseButton.change.emit();
                playPauseButton.label = 'Restart';
            }}

            dateSlider.change.emit();

            // This is pt. 2 of the prev_val/inspect stuff above
            if (prev_val !== null) {{
                source.inspect.emit(prev_val);
            }}
        }}
    """

    _DO_START_TIMER = f"""
        function startLoopTimer() {{
            updateDate();
            if ({_PBI_IS_ACTIVE}) {{
                {_PBI_TIMER} = setInterval(updateDate, {_PBI_CURR_INTERVAL_MS})
            }}

        }}

        {_PBI_TIMER_START_DATE} = new Date();

        // Should never be <0 or >1 but I am being very defensive here
        const proportionRemaining = 1 - (
            {_PBI_TIMER_ELAPSED_TIME_PROPORTION} <= 0
            ? 0
            : {_PBI_TIMER_ELAPSED_TIME_PROPORTION} >= 1
            ? 1
            : {_PBI_TIMER_ELAPSED_TIME_PROPORTION}
        );
        const remainingTimeMS = (
            {_PBI_CURR_INTERVAL_MS} * proportionRemaining
        );
        const initialInterval = (
            {_PBI_TIMER_ELAPSED_TIME_MS} === 0
            ? 0
            : remainingTimeMS
        );

        {_PBI_TIMER} = setTimeout(
            startLoopTimer,
            initialInterval
        );
    """

    _DO_STOP_TIMER = f"""
        const now = new Date();
        {_PBI_TIMER_ELAPSED_TIME_MS} += (
            now.getTime() - {_PBI_TIMER_START_DATE}.getTime()
        );
        {_PBI_TIMER_ELAPSED_TIME_PROPORTION} = (
            {_PBI_TIMER_ELAPSED_TIME_MS} / {_PBI_CURR_INTERVAL_MS}
        );
        clearInterval({_PBI_TIMER});
    """

    update_on_date_change_callback = CustomJS(
        args={"source": bokeh_data_source},
        code=f"""

        {_SETUP_WINDOW_PLAYBACK_INFO}

        const sliderValue = cb_obj.value;
        const sliderDate = new Date(sliderValue)
        // Ugh, actually requiring the date to be YYYY-MM-DD (matching DATE_FMT)
        const dateStr = sliderDate.toISOString().split('T')[0]

        const data = source.data;

        {_PBI_TIMER_ELAPSED_TIME_MS} = 0

        if (typeof(data[dateStr]) !== 'undefined') {{
            data['{value_col}'] = data[dateStr]

            const valueCol = data['{value_col}'];
            const colorCol = data['{COLOR_COL}'];
            const fakeDateCol = data['{FAKE_DATE_COL}']

            for (var i = 0; i < data['{value_col}'].length; i++) {{
                const value = valueCol[i]
                if (value == 0) {{
                    colorCol[i] = 'NaN';
                }} else {{
                    colorCol[i] = value;
                }}

                fakeDateCol[i] = dateStr;
            }}

            source.change.emit();

        }}

        """,
    )

    # Taking day-over-day diffs means the min slider day is one more than the min data
    # date (might be off by 1 if not using day over diffs but in practice not an issue)
    min_slider_date = min_date + pd.Timedelta(days=1)
    date_slider = DateSlider(
        start=min_slider_date,
        end=max_date,
        value=max_date,
        step=1,
        sizing_mode="stretch_width",
        width_policy="fit",
    )
    date_slider.js_on_change("value", update_on_date_change_callback)

    play_pause_button = Toggle(
        label="Start playing",
        button_type="success",
        active=False,
        sizing_mode="stretch_width",
    )

    animate_playback_callback = CustomJS(
        args={
            "source": bokeh_data_source,
            "dateSlider": date_slider,
            "playPauseButton": play_pause_button,
            "maxDate": max_date,
            "minDate": min_slider_date,
        },
        code=f"""

        {_SETUP_WINDOW_PLAYBACK_INFO}
        {_DEFFUN_INCR_DATE}

        if (dateSlider.value >= maxDate) {{
            if (playPauseButton.active) {{
                dateSlider.value = minDate;
                dateSlider.change.emit();

                // Hack to get timer to wait after date slider wraps; any positive
                // number works but the smaller the better
                {_PBI_TIMER_ELAPSED_TIME_MS} = 1;
            }}
        }}

        const active = cb_obj.active;
        {_PBI_IS_ACTIVE} = active;

        if (active) {{
            playPauseButton.label = 'Playing – Click/tap to pause'
            {_DO_START_TIMER}
        }} else {{
            playPauseButton.label = 'Paused – Click/tap to play'
            {_DO_STOP_TIMER}
        }}

        """,
    )

    play_pause_button.js_on_click(animate_playback_callback)

    change_playback_speed_callback = CustomJS(
        args={
            "source": bokeh_data_source,
            "dateSlider": date_slider,
            "playPauseButton": play_pause_button,
            "maxDate": max_date,
        },
        code=f"""

        {_SETUP_WINDOW_PLAYBACK_INFO}
        {_DEFFUN_INCR_DATE}

        // Must stop timer before handling changing the speed, as stopping the timer
        // saves values based on the current (unchaged) speed selection
        if ({_PBI_TIMER} !== null) {{
            {_DO_STOP_TIMER}
        }}

        const selectedIndex = cb_obj.active;
        {_PBI_SELECTED_INDEX} = selectedIndex;

        if ({_PBI_IS_ACTIVE}) {{
            {_DO_START_TIMER}
        }} else {{
            {_PBI_TIMER_ELAPSED_TIME_MS} = 0
        }}

        console.log({__PLAYBACK_INFO})

    """,
    )

    playback_speed_radio = RadioButtonGroup(
        labels=[f"{speed:.2g}x speed" for speed in _SPEED_OPTIONS],
        active=_DEFAULT_SELECTED_INDEX,
        sizing_mode="stretch_width",
    )
    playback_speed_radio.js_on_click(change_playback_speed_callback)

    plot_layout.append(
        layout_column(
            [
                date_slider,
                layout_row(
                    [play_pause_button, playback_speed_radio],
                    height_policy="min",
                ),
            ],
            width_policy="fit",
            height_policy="min",
        ))
    plot_layout = layout_column(plot_layout, sizing_mode="scale_both")

    # grid = gridplot(figures, ncols=len(count_list), sizing_mode="stretch_both")

    # Create the autoloading bokeh plot info (HTML + JS)
    js_path = str(Path(out_file_basename + "_autoload").with_suffix(".js"))
    tag_html_path = str(
        Path(out_file_basename + "_div_tag").with_suffix(".html"))

    js_code, tag_code = autoload_static(plot_layout, CDN, js_path)
    tag_uuid = re.search(r'id="([^"]+)"', tag_code).group(1)
    tag_code = re.sub(r'src="([^"]+)"', f'src="\\1?uuid={tag_uuid}"', tag_code)

    with open(Paths.DOCS / js_path,
              "w") as f_js, open(Paths.DOCS / tag_html_path, "w") as f_html:
        f_js.write(js_code)
        f_html.write(tag_code)

    # Create the video by creating stills of the graphs for each date and then stitching
    # the images into a video
    if should_make_video:
        save_dir: Path = PNG_SAVE_ROOT_DIR / out_file_basename
        save_dir.mkdir(parents=True, exist_ok=True)

        STILL_WIDTH = 1500
        STILL_HEIGHT = int(np.ceil(STILL_WIDTH / plot_aspect_ratio) *
                           1.05)  # Unclear why *1.05 is necessary
        gp.height = STILL_HEIGHT
        gp.width = STILL_WIDTH
        gp.sizing_mode = "fixed"
        orig_title = anchor_fig.title.text

        for date in dates:
            date_str = date.strftime(DATE_FMT)
            anchor_fig.title = Title(text=f"{orig_title} {date_str}")

            for p in figures:
                p.title = Title(text=p.title.text, text_font_size="20px")

            # Just a reimplementation of the JS code in the date slider's callback
            data = bokeh_data_source.data
            data[value_col] = data[date_str]

            for i, value in enumerate(data[value_col]):
                if value == 0:
                    data[COLOR_COL][i] = "NaN"
                else:
                    data[COLOR_COL][i] = value

                data[FAKE_DATE_COL][i] = date_str

            save_path: Path = (save_dir / date_str).with_suffix(".png")
            export_png(gp, filename=save_path)
            resize_to_even_dims(save_path, pad_bottom=0.08)

            if date == max(dates):
                poster_path: Path = (
                    PNG_SAVE_ROOT_DIR /
                    (out_file_basename + "_poster")).with_suffix(".png")
                poster_path.write_bytes(save_path.read_bytes())

        make_video(save_dir, out_file_basename, 0.9)

    print(f"Did interactive {out_file_basename}")

    return (js_code, tag_code)
Beispiel #7
0
def get_current_case_data(
    df: pd.DataFrame,
    *,
    stage: Union[DiseaseStage, Literal[Select.DEFAULT]],
    count: Counting,
    x_axis: Columns.XAxis,
) -> pd.DataFrame:
    """Get the current case information

    For the given stage (optional), count, and x_axis, return a dataframe containing the
    current case information -- current stage (per capita) count

    :param df: The dataframe containing all case info
    :type df: pd.DataFrame
    :param stage: The stage to keep when getting current data
    :type stage: Optional[DiseaseStage]
    :param count: The count type to keep when getting current data
    :type count: Counting
    :param x_axis: Used to determine which column to sort the current case data by
    :type x_axis: Columns.XAxis
    :return: Dataframe representing the current state of affairs
    :rtype: pd.DataFrame
    """

    DiseaseStage.verify(stage, allow_select=True)
    Counting.verify(count)
    Columns.XAxis.verify(x_axis)

    # Filter in order to compute doubling time
    df = df[df[Columns.CASE_COUNT] > 0]

    if stage is Select.DEFAULT:
        stage = DiseaseStage.CONFIRMED

    relevant_case_type = CaseInfo.get_info_item_for(InfoField.CASE_TYPE,
                                                    stage=stage,
                                                    count=count)

    day_indices = [0, *ADTL_DAY_INDICES]

    def get_group_stats(g: pd.DataFrame) -> pd.Series:
        # Filter to the relevant case type and just the two columns
        doubling_time_group = g.loc[g[Columns.CASE_TYPE] == relevant_case_type,
                                    [Columns.DATE, Columns.CASE_COUNT], ]

        # Get the doubling times for selected day indices (fed to iloc)
        # Keys are stringified iloc positions (0, k, -j, etc),
        # Values are values at that iloc
        doubling_times = {}
        current_date, current_count = doubling_time_group.iloc[-1]
        for day_idx in day_indices:
            col_name = form_doubling_time_colname(day_idx)
            try:
                then_row = doubling_time_group.iloc[day_idx]
            except IndexError:
                doubling_times[col_name] = np.nan
                continue

            # $ currentCount = initialCount * 2^{_days/doublingTime} $
            then_date = then_row[Columns.DATE]
            then_count = then_row[Columns.CASE_COUNT]

            n_days = (current_date - then_date).total_seconds() / 86400
            count_ratio = current_count / then_count

            doubling_times[col_name] = n_days / np.log2(count_ratio)

        data_dict = {
            START_DATE:
            doubling_time_group[Columns.DATE].min(),
            **doubling_times,
            # Get last case count of each case type for current group
            # .tail(1).sum() is a trick to get the last value if it exists,
            # else 0 (remember, this is sorted by date)
            **(g.groupby(Columns.CASE_TYPE)[Columns.CASE_COUNT].apply(lambda h: h.tail(1).sum(
                                                                      )).to_dict(
               )),
        }

        return pd.Series(
            data_dict,
            index=[
                START_DATE,
                *doubling_times.keys(),
                *CaseInfo.get_info_items_for(InfoField.CASE_TYPE),
            ],
        )

    if x_axis is Columns.XAxis.DAYS_SINCE_OUTBREAK:
        sort_col = form_doubling_time_colname(0)
        sort_ascending = True
    elif x_axis is Columns.XAxis.DATE:
        sort_col = relevant_case_type
        sort_ascending = False
    else:
        x_axis.raise_for_unhandled_case()

    current_case_counts = (
        df.groupby(Columns.location_id_cols).apply(get_group_stats)
        # Order locations by decreasing current confirmed case count
        # This is used to keep plot legend in sync with the order of lines on the graph
        # so the location with the most current cases is first in the legend and the
        # least is last
        .sort_values(sort_col, ascending=sort_ascending).reset_index())

    confirmed_col, death_col = [
        CaseInfo.get_info_item_for(InfoField.CASE_TYPE,
                                   stage=stage,
                                   count=count)
        for stage in [DiseaseStage.CONFIRMED, DiseaseStage.DEATH]
    ]
    current_case_counts[CaseTypes.MORTALITY] = (
        current_case_counts[death_col] / current_case_counts[confirmed_col])

    return current_case_counts