예제 #1
0
def test_timeline():
    df = generate_dummy_data()
    plot_timeline(df,
                  stacked=True,
                  legend=True,
                  freq="2D",
                  tick_format="%d/%M",
                  tick_step=3)
    return plt.gcf()
예제 #2
0
def test_nonstacked_timeline():
    df = generate_dummy_data()
    plot_timeline(
        df,
        stacked=False,
        freq="W",
        tick_format="Week %W %Y",
        tick_step=1,
        colors="blue",
    )
    return plt.gcf()
예제 #3
0
파일: main.py 프로젝트: lrjball/chatviz
def visualize_chat(
    df,
    title,
    colors="default",
    timeline_freq="MS",
    timeline_tick_format="%b '%y",
    timeline_tick_step=6,
    timeline_color="default",
    timeline_stacked=False,
    top_n_words=10,
    stopwords=None,
):
    """
    Creates a series of plots given a dataframe of messages.

    The plots created are:

    1. 3 donut plots showing the number of messages, words and characters used
       by each person in the chat.
    2. A bar chart showing the average time to reply in hours for each person.
    3. A timeline showing the number of messages over time, can optionally be
       split down to per person.
    4. Bar charts showing the most used words by each person.
    5. 2 radar plots showing the number of messages sent at each hour and day
       respectively for each person.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe of messages. Must have the columns
        ['date', 'name', 'text'].
    title : str
        The title for the plot.
    colors : {'default'} or list of str or dict
        The colors to be used for each person in the chat. Should be either
        'default' in which case the default color scheme is used, a list of
        colors the same length as the number of names in df['name'], or a dict
        which maps each name to a color. For more info about color options,
        see `here <https://matplotlib.org/2.0.2/api/colors_api.html>`_.
    timeline_freq: str
        The offset string for the resample frequency in the timeline plot. See `here
        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`_
        for more. The default is 'MS', which will generate a monthly plot.
    timeline_tick_format : str
        The format string for the x tick labels in the timeline plot, which are dates.
        The default is '%b \'%y' which gives for example `Jan '19`.
    timeline_tick_step : int
        The number of steps between the ticks on the x-axis for the timeline
        plot. By default this is 6, so every 6 bars will have an x-tick label.
    timeline_color : str
        Only applicable if timeline_stacked is False. In this case, the timeline
        plot will only be one color, which may want to be different from `colors`
        as these are per person. Default is 'default' which will just use matplotlibs
        default color.
    timeline_stacked : bool
        If True, then a stacked bar chart will be created for the timeline plot,
        with one color per person involved in the chat. If False (default),
        will just plot one colored bar with the overall count.
    top_n_words : int
        The number of top words to include in the words bar chart. Default is 10.
    stopwords : None or iterable
        If given, then these words will be removed from the words bar charts.
        If None, all words will be kept (Note: this will lead to poor results
        as 'the', 'and', 'a', 'is' etc. will be the top words. A stopword list
        is recommended).

    Returns
    -------
    plt.figure
        A matplotlib figure with all of the message plots on it.

    Examples
    --------

    This example visualizes the script from Monty Python Flying Circus
    Series 1. In just a few lines we can used chatviz to create a sophisticated
    infographic. Note: The dates have been randomly created for this example.

    .. plot:: ../examples/complete_example.py
    """
    fig = plt.figure()
    gs = fig.add_gridspec(4,
                          4,
                          height_ratios=[0.2, 0.5, 0.2, 0.2],
                          hspace=0.6,
                          wspace=0.5)

    color_dict = _build_color_dict(colors, df)

    gsdonuts = gs[0, :3].subgridspec(1, 3)
    ax_donuts = [
        fig.add_subplot(gsdonuts[0]),
        fig.add_subplot(gsdonuts[1]),
        fig.add_subplot(gsdonuts[2]),
    ]
    plot_donuts(df, ax=ax_donuts, colors=color_dict)

    ax_legend = fig.add_subplot(gs[0, 3])
    # plot_reply_times(df, ax=ax_reply, colors=color_dict)
    plot_legend(color_dict, ax=ax_legend)

    ax_timeline = fig.add_subplot(gs[1, :])

    plot_timeline(
        df,
        ax=ax_timeline,
        colors=color_dict if timeline_stacked else [timeline_color],
        freq=timeline_freq,
        tick_format=timeline_tick_format,
        tick_step=timeline_tick_step,
        stacked=timeline_stacked,
    )

    gswords = gs[2, :].subgridspec(1, len(set(df["name"])), wspace=1.3)
    ax_words_title = fig.add_subplot(gswords[:])
    ax_words_title.set_title("Most Used Words", y=1.1)
    if len(color_dict) > 1:
        ax_words_title.axis(False)
    ax_words = [
        fig.add_subplot(gswords[i]) for i in range(len(set(df["name"])))
    ]
    plot_words(df,
               ax=ax_words,
               colors=color_dict,
               top_n=top_n_words,
               stopwords=stopwords)

    gsradar = gs[3, 2:].subgridspec(1, 2)
    ax_radar_title = fig.add_subplot(gsradar[:])
    ax_radar_title.set_title("Distribution of Message Times", y=1.2)
    ax_radar_title.axis(False)
    hour_radar_ax = fig.add_subplot(gsradar[0], polar=True)
    plot_hours_radar(df, ax=hour_radar_ax, colors=colors)
    day_radar_ax = fig.add_subplot(gsradar[1], polar=True)
    plot_days_radar(df, ax=day_radar_ax, colors=color_dict)

    gsreply = gs[3, :2].subgridspec(1, 2)
    ax_reply_title = fig.add_subplot(gsreply[:])
    ax_reply_title.axis(False)
    ax_reply_title.set_title("Average Time to Reply", y=1.2)
    ax_reply = fig.add_subplot(gs[3, :2])
    plot_reply_times(df, ax=ax_reply, colors=color_dict)

    fig.suptitle(title, y=0.95)
    return fig
예제 #4
0
from chatviz.utils import load_example_chat_data
from chatviz.plotting import plot_timeline
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [20, 10]
plt.rcParams["font.size"] = 20
plt.rcParams["axes.titlesize"] = 40
plt.rcParams["font.family"] = "Sawasdee"

df = load_example_chat_data()
palette = ["#20639B", "#3CAEA3", "#F6D55C", "#ED553B", "#173F5F"]
plot_timeline(
    df,
    stacked=True,
    legend=True,
    freq="2D",
    tick_format="%d/%m/%y",
    tick_step=3,
    colors=palette,
)
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
plt.tight_layout()
plt.show()
예제 #5
0
from chatviz.utils import load_example_chat_data
from chatviz.plotting import plot_timeline
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [20, 10]
plt.rcParams["font.size"] = 20
plt.rcParams["axes.titlesize"] = 40
plt.rcParams["font.family"] = "Sawasdee"

df = load_example_chat_data()
palette = ["#20639B", "#3CAEA3", "#F6D55C", "#ED553B", "#173F5F"]
plot_timeline(
    df,
    stacked=True,
    legend=True,
    freq="W",
    tick_format="Week %W, %Y",
    tick_step=1,
    colors=palette,
)
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
plt.tight_layout()
plt.show()
예제 #6
0
from chatviz.utils import load_example_chat_data
from chatviz.plotting import plot_timeline
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [20, 10]
plt.rcParams["font.size"] = 20
plt.rcParams["axes.titlesize"] = 40
plt.rcParams["font.family"] = "Sawasdee"

df = load_example_chat_data()
plot_timeline(
    df, stacked=False, colors="#20639B", tick_step=1, freq="2W", tick_format="%d %b '%y"
)
plt.tight_layout()
plt.show()